before you start: .rs.restartR()
GitClonePath <- "/Users/admin/Desktop/ElbeEstuarineZander"
setwd(GitClonePath)
# R package management tools such as packrat or renv. These tools allow you to create isolated environments with specific versions of R and installed packages, ensuring reproducibility and avoiding conflicts with other packages installed in your system.
#install.packages("packrat")
#packrat::init()
#packrat::restore()
Most of the packages needed here are stored in the packrat::restore
Date <- "16.01.2024"
Species <- "SL"
Year <- "2021"
Season <- "Summer"
Analysis <- "WGCNA"
################
#Bacteria Input#
pslist <- readRDS(file.path(path_Output_16S,paste(paste( "SL_2021_Summer_16S_merge_Filter_ASV_besttax", Date, sep="_"), ".rds", sep="")))
pslistraw <- readRDS(file.path(path_Output_16S,paste(paste("SL_2021_Summer_ps_16S_merge_pslistraw", Date, sep="_"), ".rds", sep="")))
##################################################
#AnnotationFile Created in Sections 1.1.3 - 1.1.5#
SLUCGeneManual <- read.csv2(file.path(path_Input_RNA, "SLUCGene-Manual-17.06.2023.csv"))[-1]
#############
#Sample File#
SAMDF<- read.table(file=file.path(path_Input_RNA, "SL_samplefile_04.01.2024.csv") ,sep=";",dec = ".")
#####################
#Count and TPM files#
cnt_RNA_Gill <- readRDS(file.path(path_Output_RNA, "SL_Gill_featurecounts_countsNoRibo_08.01.24.rds"))
cnt_RNA_Liver <- readRDS(file.path(path_Output_RNA, "SL_Liver_featurecounts_countsNoRibo_08.01.24.rds"))
tpm_RNA_Liver <- readRDS(file.path(path_Output_RNA, "SL_Liver_featurecounts_tpm_NoRibo_08.01.24.rds"))
tpm_RNA_Gill <- readRDS(file.path(path_Output_RNA, "SL_Gill_featurecounts_tpm_NoRibo_08.01.24.rds"))
tpms <- list("tpmGill "= tpm_RNA_Gill, "tpmLiver" = tpm_RNA_Liver)
################################################
#Assign the SL_RNA Output to Global Environment#
save_name_RNA_Deseq2 <- "SL_2021_Summer" #from SL_RNA_16.01.24_Paper.Rmd
DeseqLFC005 <- readRDS(file = paste0(file.path(path_Output_RNA, paste(paste(paste(save_name_RNA_Deseq2, "DEG_Replicates_Outlier_pairwise", sep="_"), paste("LFC", "0.05", sep=""), Date, sep="_"), ".rds", sep=""))))
DeseqLFC1 <- readRDS(file = paste0(file.path(path_Output_RNA, paste(paste(paste(save_name_RNA_Deseq2, "DEG_Replicates_Outlier_pairwise", sep="_"), paste("LFC", "1", sep=""), Date, sep="_"), ".rds", sep=""))))
traitData<- c(
"FultonK", "Length","Weight",
"StomachContent",
"HSI", "SSI",
"O2","Salinity","SecchiDepth")
#Sample Files
library(readxl)
library(tidyverse)
library(ggplot2)
SAMDF_16S<-dplyr::filter(SAMDF, DNA16S == "yes")
rownames(SAMDF_16S) <- SAMDF_16S$SampleID
# Get all IDs present
SAMDF_RNA_Gill<-dplyr::filter(SAMDF, RNAseq == "yes")
rownames(SAMDF_RNA_Gill) <- SAMDF_RNA_Gill$SampleID
SAMDF_RNA_Liver<-dplyr::filter(SAMDF, RNAseq == "yes")
rownames(SAMDF_RNA_Liver) <- SAMDF_RNA_Liver$SampleID
SAMDF_RNA_Consensus<-dplyr::filter(SAMDF, RNAseq == "yes")
rownames(SAMDF_RNA_Consensus) <- SAMDF_RNA_Consensus$SampleID
OutlineColor <- "grey20" #"white"
´ ## 0.6 Tutorials
#-
##- Figure 2C
Abiotics<-c("O2","Salinity", "SecchiDepth", "Temperature", "pH") #, "Salinity", )
AbioticsColors <- c("O2" ="#3399FF", "Salinity" = "#FFC333", "SecchiDepth"= "#CC6633", "Temperature" = "#FF0000", "pH" ="#999333")
require(ggrepel)
df <- na.omit(SAMDF[,c(Abiotics, "Loc", "Season")][SAMDF$Season == "Elbe_Summer_21",]%>% distinct(Loc, .keep_all = TRUE))
df$Ekm <- c("Ekm-713", "Ekm-692", "Ekm-665", "Ekm-633")
df$Locs <-c("MG", "BB", "SS", "ML")
df_long <- df %>% gather(Abiotics, Value, O2:pH)
df_long$fLoc <- as.numeric(factor(df_long$Loc, levels = unique(df_long$Loc)))
AbioticsLinePlot <- ggplot(df_long, aes(x = fLoc, y = Value, color = Abiotics, label = round(Value, 2))) +
geom_line() +
geom_point(size=3) +
geom_text_repel(aes(y = Value + 0.3), size = 4, show.legend = FALSE, fontface = "bold") +
labs(x = "", y = "Abiotics", color = "Abiotics") +
theme_minimal() + #atheme +
#theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_x_continuous(breaks = df_long$fLoc, labels = paste(df_long$Ekm))+ #df_long$Locs,
scale_color_manual(values = AbioticsColors,
breaks = names(AbioticsColors),
labels = noquote(paste(names(AbioticsColors), " [", noquote(Units[names(AbioticsColors)]), "]",
sep = ""))) +
theme(axis.title.x = element_blank(),
axis.title.x.bottom = element_text(size=10,face = "bold"),
axis.title.y.left = element_text(size=10,face = "bold"),
strip.text.x = element_text(face = "bold"),
strip.text.x.bottom = element_text(size = 10,face = "bold"),
strip.text.y.left = element_text(size = 10,face = "bold"),
axis.text.x.bottom = element_text(face = "bold", angle = 0, hjust = 0),
axis.text.y.left = element_text(size = 10, face = "bold"),
legend.title = element_blank(),
legend.text = element_text(size = 10,face = "bold"),
axis.text.x = element_text(size=10, angle = 0),
plot.title = element_text(size = 10, face = "bold"),
plot.subtitle = element_text(size = 9),
plot.caption = element_text(size = 9))
plot(AbioticsLinePlot)
ggsave(AbioticsLinePlot, filename = paste(paste(Species, Year, Season, "Abiotics_Locs", sep="_"), ".png", sep=""), path = pathPlots, device='png', dpi=300, width = 6.5,
height = 3.5)
set.seed(123)
Species <- "SL"
Tissue <- "Gill"
Year <- "2021"
Season <- "Summer"
Type <- "16S"
Analysis <- "WGCNA"
alpha <- 0.05
OperatingSystem <- "Windows"
prefix <- "SSU-"
save_name_16S <- paste(Species, Year, Season, Tissue, Type, Analysis, sep = "_")
paste0(file.path(path_Output_WGCNA), save_name_16S, "_.RData")
## [1] "/Users/admin/Desktop/ElbeEstuarineZander/SL_Output_WGCNASL_2021_Summer_Gill_16S_WGCNA_.RData"
Samples_SSU_Gill <- c(
"SLSU21MGEB1", "SLSU21MGEB2", "SLSU21MGEB3", "SLSU21MGEB4", "SLSU21MGEB5", "SLSU21MGEB7",
"SLSU21BBEB1", "SLSU21BBEB2", "SLSU21BBEB4", "SLSU21BBEB6", "SLSU21BBEB9", "SLSU21BBEB7",
"SLSU21SSEB2", "SLSU21SSEB3", "SLSU21SSEB5", "SLSU21SSEB6", "SLSU21SSEB7", "SLSU21SSEB9",
"SLSU21MLEB1", "SLSU21MLEB2", "SLSU21MLEB5", "SLSU21MLEB6", "SLSU21MLEB7", "SLSU21MLEB9")
Some comments on normalization: - Normalization: Strand et al., 2021 4.2.1. Data preprocessing Transcript expression counts were summed for each gene. Expression values were normalized across samples using the Trimmed mean of M values (TMM)-method [33], and log2- transformed. Genes with expression levels below 1.0 in all samples and/or with a standard deviation less than 0.15 were removed before network inference. This reduced the number of genes from 48,057 to 37,408. We retained OTUs that contribute at least 0.005% of the total microbial abundance. This reduced the number of OTUs from 1152 to 296 and the number of zeros in the abundance table by 50%. OTU abundances was normalized using the Cumulative Sum Scaling (CSS) method from the Bioconductor package metagenome- Seq, which by default log2-transforms the data.
Swift et al. 2021: Cumulative-Sum Score (CSS) uses robust statistics to provide an alternative to TSS that is less influenced by preferentially sampled taxa. Its normalization scaling factor is defined as the cumulative sum of the observed counts up to a threshold which is determined using a heuristic that minimizes the influence of the preferentially sampled taxa. The idea is that CSS scales each sample using only the part of the counts distribution that is relatively invariant (H. Lin &Peddada, 2020a; Weiss et al., 2017). However, the determination of the thresholds could fail due to high count variability (J. Chen et al., 2018). CSS or TSS do not account for sparsity or address compositionality.
The DESeq scaling factor of the observed abundances for each sample is computed as the median of all the ratios between counts of the sample and the reference. Assuming most taxa are not differentially abundant, the median of these ratios would provide an estimate of a correction factor for all the read counts. Like most normalizations, TMM and DESeq rely on the strong assumptions that most taxa are not differentially abundant, and of those differentially abundant, there is an approximate balanced amount of over-abundance and under-abundance (Calza & Pawitan, 2010; Dillies et al., 2012; Gloor et al., 2017; Weiss et al., 2017). These may be reasonable assumptions for gene expression data but may not be valid for microbiome data which tends to be diverse. Also, while TMM and DESEq normalization are designed for compositional data, they do not address sparsity. In fact, large fractions of zeros and relatively low sequencing depths can be problematic for both methods and can lead to serious biases (Kumar et al., 2018). As stated previously, adding pseudo-counts to the original count data will not rectify the problem.
Large scale tool benchmarking by Thorsen et al. has revealed that most of the commonly used differential (relative) abundance testing methods including edgeR and DESeq are sensitive to sparsity (Thorsen et al., 2016) and consequently exhibit unacceptably high false positive rates (Hawinkel et al., 2019). Also, recent investigations by H. Lin and Peddada (2020a, 2020b) indicate poor performance of these methods for microbiome data. However, based on simulations and analytical derivations, they attribute the poor performance to the violation of the assumption that most taxa do not change. As a result, the false discovery rate is inflated, and it increases as the sample size increases. Similar behavior of these methods was also seen by Weiss et al. (2017). Thus, H. Lin and Peddada (2020a) do not advocate the use of edgeR and DESeq for the analysis of microbiome data.
following http://mixomics.org/mixmc/mixmc-preprocessing/ Arumugam et al., (2011)
library(phyloseq)
library(tidyverse)
psraw <-pslistraw$ps_SL_21
ps <-pslist$ps_SL_21
low.count.removal <- function(
data, # OTU count df of size n (sample) x p (OTU)
percent=0.005 # cutoff chosen
) {
keep.otu = which(colSums(data)*100/(sum(colSums(data))) > percent)
data.filter = data[,keep.otu]
return(list(data.filter = data.filter, keep.otu = keep.otu))}
#######################################################
#Plot consequence of filtering from Strand et al, 2021#
#######################################################
frac_zero <- c()
all_sum <- c()
num_otu <- c()
seqp <- seq(0,0.1,0.0001)
for(i in seqp){
result.filter = low.count.removal(otu_table(psraw), percent=i)
length(result.filter$keep.otu)
otu.f = result.filter$data.filter
frac_zero <- c(frac_zero, sum(otu.f == 00)/ (ncol(otu.f)*nrow(otu.f)))
all_sum <- c(all_sum, sum(otu.f))
num_otu <- c(num_otu, ncol(otu.f))
}
names(frac_zero) <- seqp
names(all_sum) <- seqp
applied_filter <- 0.005
# Get all_sum and num_otu to a fraction of the total
mas <- max(all_sum)
mno <- max(num_otu)
all_sum %>% (function(x){x/max(x)}) -> all_sum
num_otu %>% (function(x){x/max(x)}) -> num_otu
data.frame(filter = seqp,
Percent.zeros = frac_zero*100,
Total.abundance = all_sum*100,
Number.of.OTUs = num_otu*100) %>%
tidyr::gather(key = "Type", value = "percent.of.total", -filter) %>%
ggplot() +
geom_line(mapping = aes(x = filter, y = percent.of.total, col = Type)) +
geom_vline(xintercept = applied_filter, alpha = 0.5, color = "red", linetype="dashed")+
ylab("Percent of total") +
xlab(paste("Filter at", applied_filter)) +
theme(legend.title = element_blank()) -> FilterZerosPlot
FilterZerosPlot
#The Filtering has been performed in the SL_16S.rmd
# ps <-pslist$ps_SL_21
# result.filter <- low.count.removal(otu_table(ps), percent=applied_filter)
# data.filter <- result.filter$data.filter
# length(result.filter$keep.otu) # check how many OTUs remain
#ps_SL_WGCNA <- prune_taxa(names(result.filter$keep.otu), ps)
####################
#CLR Transformation#
####################
ps_SL_WGCNA <- pslist$ps_SL_21
tail(t(otu_table(ps_SL_WGCNA)))
## OTU Table: [6 taxa and 24 samples]
## taxa are rows
## SLSU21BBEB7 SLSU21MGEB7 SLSU21MLEB9 SLSU21SSEB9
## ASV13564:Paracaedibacteraceae 0 0 0 0
## ASV14092:Roseibacillus 0 0 0 0
## ASV14296:Acinetobacter 0 0 0 0
## ASV14326:Streptococcus 0 0 0 0
## ASV15061:Flavobacterium 0 0 0 0
## ASV15299:Achromobacter 0 0 0 0
## SLSU21BBEB1 SLSU21BBEB2 SLSU21BBEB4 SLSU21BBEB6
## ASV13564:Paracaedibacteraceae 0 0 0 0
## ASV14092:Roseibacillus 0 0 0 0
## ASV14296:Acinetobacter 0 0 0 0
## ASV14326:Streptococcus 0 0 0 0
## ASV15061:Flavobacterium 0 0 0 0
## ASV15299:Achromobacter 0 0 0 0
## SLSU21BBEB9 SLSU21MGEB1 SLSU21MGEB2 SLSU21MGEB3
## ASV13564:Paracaedibacteraceae 0 0 0 0
## ASV14092:Roseibacillus 0 0 0 35
## ASV14296:Acinetobacter 0 0 0 0
## ASV14326:Streptococcus 0 0 34 0
## ASV15061:Flavobacterium 0 0 0 0
## ASV15299:Achromobacter 0 0 0 0
## SLSU21MGEB4 SLSU21MGEB5 SLSU21MLEB1 SLSU21MLEB2
## ASV13564:Paracaedibacteraceae 0 0 0 0
## ASV14092:Roseibacillus 0 0 0 0
## ASV14296:Acinetobacter 0 0 0 20
## ASV14326:Streptococcus 0 0 0 0
## ASV15061:Flavobacterium 0 0 1 0
## ASV15299:Achromobacter 0 0 0 21
## SLSU21MLEB5 SLSU21MLEB6 SLSU21MLEB7 SLSU21SSEB2
## ASV13564:Paracaedibacteraceae 0 0 31 0
## ASV14092:Roseibacillus 0 0 0 0
## ASV14296:Acinetobacter 12 0 0 0
## ASV14326:Streptococcus 0 0 0 0
## ASV15061:Flavobacterium 0 0 26 0
## ASV15299:Achromobacter 8 0 0 0
## SLSU21SSEB3 SLSU21SSEB5 SLSU21SSEB6 SLSU21SSEB7
## ASV13564:Paracaedibacteraceae 0 0 0 0
## ASV14092:Roseibacillus 0 0 0 0
## ASV14296:Acinetobacter 0 0 0 0
## ASV14326:Streptococcus 0 0 0 0
## ASV15061:Flavobacterium 0 0 4 0
## ASV15299:Achromobacter 0 0 0 0
table(rowSums(t(otu_table(ps_SL_WGCNA))))
##
## 28 29 30 31 32 33 34 35 36 37 38 39 40
## 28 21 28 26 25 21 20 26 15 11 11 8 18
## 41 42 43 44 45 46 47 48 49 50 51 52 53
## 18 17 18 13 10 14 14 13 7 8 10 8 7
## 54 55 56 57 58 59 60 61 62 63 64 65 66
## 7 7 11 10 5 8 5 8 12 8 11 2 5
## 67 68 69 70 72 73 74 75 76 77 78 79 80
## 6 6 8 10 7 1 4 13 4 12 4 3 4
## 81 82 83 84 85 86 87 88 89 90 91 92 93
## 5 4 3 10 4 4 6 8 3 1 3 2 2
## 94 96 97 98 99 100 101 102 103 104 105 106 107
## 2 3 2 2 3 1 3 3 3 2 2 3 2
## 108 109 111 112 113 114 116 117 118 119 120 121 122
## 4 4 1 4 4 5 2 1 2 1 1 4 1
## 123 124 125 126 127 128 129 130 132 133 134 135 136
## 2 1 4 2 2 4 2 1 1 2 1 2 2
## 137 138 139 140 141 142 143 144 145 146 147 148 149
## 2 1 1 3 2 3 2 1 1 1 3 3 3
## 150 151 152 153 154 155 156 157 159 160 162 163 164
## 2 1 1 4 2 2 2 4 1 1 3 1 3
## 166 167 168 169 170 171 172 174 175 178 180 181 182
## 1 2 1 3 3 1 3 3 1 1 1 1 1
## 184 186 187 188 189 191 193 194 195 196 197 198 201
## 1 1 2 3 2 1 1 1 2 1 3 2 4
## 202 204 206 208 210 212 213 215 216 217 218 219 223
## 1 1 1 1 1 2 1 1 1 2 1 1 2
## 227 228 230 232 233 236 238 239 240 242 243 252 253
## 1 1 1 1 1 1 1 1 1 1 1 1 2
## 255 259 262 264 265 267 270 274 277 279 282 283 284
## 1 3 1 1 1 1 1 2 1 3 1 1 1
## 285 286 287 290 291 296 297 299 309 310 312 313 320
## 1 1 1 1 1 2 2 1 1 1 1 1 1
## 324 325 327 328 329 332 333 334 336 340 343 344 346
## 2 1 2 2 1 1 1 1 1 1 1 1 1
## 352 354 355 356 359 361 362 363 365 369 370 375 377
## 1 1 2 1 1 1 1 3 2 1 2 2 1
## 386 395 397 398 400 410 419 420 421 434 440 443 450
## 1 1 1 1 1 1 1 1 1 1 2 1 1
## 451 459 463 467 469 473 480 488 493 497 499 515 516
## 2 1 1 1 1 1 1 1 1 1 1 1 1
## 519 527 533 543 557 566 573 593 600 604 607 613 614
## 2 1 1 1 1 1 1 1 1 1 1 1 1
## 615 621 624 636 640 642 647 667 668 669 671 677 685
## 1 2 1 1 1 1 1 1 1 1 1 1 1
## 688 689 690 692 712 719 720 721 730 740 767 778 800
## 1 2 1 1 1 1 1 1 1 1 1 1 1
## 804 820 826 848 863 866 868 871 884 891 897 923 925
## 1 1 1 1 1 1 1 1 1 1 1 1 1
## 938 940 953 984 991 1006 1056 1091 1098 1157 1182 1193 1246
## 1 1 1 1 1 1 1 1 2 1 1 1 1
## 1329 1331 1337 1343 1349 1361 1413 1417 1421 1433 1465 1522 1662
## 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1697 1701 1771 1878 1946 2012 2237 2303 2328 2716 2806 3050 3240
## 1 1 1 1 1 1 1 1 1 1 1 1 1
## 3325 3372 3713 3780 3948 4194 4355 4478 4481 5154 5303 6348 6672
## 1 1 1 1 1 1 1 1 1 1 1 1 1
## 6741 7487 7812 8830 9113 9885 9901 10524 11192 11525 12945 13014 13659
## 1 1 1 1 1 1 1 1 1 1 1 1 1
## 20884 27811 68650
## 1 1 1
ps_SL_WGCNA_clr <- microbiome::transform(ps_SL_WGCNA, "clr")
head(t(otu_table(ps_SL_WGCNA_clr)))
## OTU Table: [6 taxa and 24 samples]
## taxa are rows
## SLSU21BBEB7 SLSU21MGEB7 SLSU21MLEB9 SLSU21SSEB9
## ASV1:Elizabethkingia 8.600036 7.970279 6.624233 7.659307
## ASV2:Citrobacter 7.237126 6.672406 4.516563 5.855956
## ASV3:Enterobacteriaceae 7.040568 6.609335 4.303730 5.687514
## ASV4:Enterobacteriaceae 6.840890 6.396027 4.232044 5.562401
## ASV5:Enterobacter 6.646325 6.371862 4.032998 5.400742
## ASV6:Enterobacteriaceae 6.470677 6.213147 3.767479 5.325949
## SLSU21BBEB1 SLSU21BBEB2 SLSU21BBEB4 SLSU21BBEB6
## ASV1:Elizabethkingia 9.483500 8.631171 9.448500 9.027291
## ASV2:Citrobacter 7.893771 7.058225 7.952625 7.522832
## ASV3:Enterobacteriaceae 8.016703 7.075600 8.045857 7.535790
## ASV4:Enterobacteriaceae 7.842953 6.693711 7.872213 7.498632
## ASV5:Enterobacter 7.500675 6.535178 7.299677 7.172404
## ASV6:Enterobacteriaceae 7.440153 6.522424 7.426611 7.068435
## SLSU21BBEB9 SLSU21MGEB1 SLSU21MGEB2 SLSU21MGEB3
## ASV1:Elizabethkingia 7.633066 6.782400 8.621982 9.191411
## ASV2:Citrobacter 5.561344 4.543761 6.760025 7.625637
## ASV3:Enterobacteriaceae 5.398641 4.141420 6.992007 7.346226
## ASV4:Enterobacteriaceae 5.242239 4.543761 6.548012 7.352757
## ASV5:Enterobacter 5.100682 3.354286 6.442969 6.913629
## ASV6:Enterobacteriaceae 4.815751 3.108828 6.309059 6.798151
## SLSU21MGEB4 SLSU21MGEB5 SLSU21MLEB1 SLSU21MLEB2
## ASV1:Elizabethkingia 8.645028 9.061274 7.871273 7.338069
## ASV2:Citrobacter 7.381247 7.625210 6.442742 4.999188
## ASV3:Enterobacteriaceae 7.343888 7.516458 6.401930 5.162813
## ASV4:Enterobacteriaceae 7.226915 7.384228 6.405396 4.667424
## ASV5:Enterobacter 7.042626 7.219807 6.081399 4.599605
## ASV6:Enterobacteriaceae 6.807887 7.068901 6.062082 4.607371
## SLSU21MLEB5 SLSU21MLEB6 SLSU21MLEB7 SLSU21SSEB2
## ASV1:Elizabethkingia 7.626074 7.908607 7.623295 8.384012
## ASV2:Citrobacter 6.048266 5.790626 6.236401 6.939127
## ASV3:Enterobacteriaceae 5.910103 5.515858 6.269074 6.923891
## ASV4:Enterobacteriaceae 5.782935 5.651755 6.212878 7.154263
## ASV5:Enterobacter 5.490392 5.259835 5.881279 6.548759
## ASV6:Enterobacteriaceae 5.285756 5.415629 5.682809 6.526166
## SLSU21SSEB3 SLSU21SSEB5 SLSU21SSEB6 SLSU21SSEB7
## ASV1:Elizabethkingia 7.606868 8.223416 8.899901 9.267608
## ASV2:Citrobacter 6.202312 6.913825 6.932687 7.149940
## ASV3:Enterobacteriaceae 6.110288 6.631920 6.910952 7.255673
## ASV4:Enterobacteriaceae 5.866606 6.804931 7.003572 7.333534
## ASV5:Enterobacter 5.809837 6.325696 6.551248 6.927333
## ASV6:Enterobacteriaceae 5.433320 6.343234 6.551248 6.634010
tail(t(otu_table(ps_SL_WGCNA_clr)))
## OTU Table: [6 taxa and 24 samples]
## taxa are rows
## SLSU21BBEB7 SLSU21MGEB7 SLSU21MLEB9 SLSU21SSEB9
## ASV13564:Paracaedibacteraceae -1.693732 -2.166058 -1.976644 -2.192358
## ASV14092:Roseibacillus -1.693732 -2.166058 -1.976644 -2.192358
## ASV14296:Acinetobacter -1.693732 -2.166058 -1.976644 -2.192358
## ASV14326:Streptococcus -1.693732 -2.166058 -1.976644 -2.192358
## ASV15061:Flavobacterium -1.693732 -2.166058 -1.976644 -2.192358
## ASV15299:Achromobacter -1.693732 -2.166058 -1.976644 -2.192358
## SLSU21BBEB1 SLSU21BBEB2 SLSU21BBEB4 SLSU21BBEB6
## ASV13564:Paracaedibacteraceae -1.163678 -1.809827 -1.126862 -1.418989
## ASV14092:Roseibacillus -1.163678 -1.809827 -1.126862 -1.418989
## ASV14296:Acinetobacter -1.163678 -1.809827 -1.126862 -1.418989
## ASV14326:Streptococcus -1.163678 -1.809827 -1.126862 -1.418989
## ASV15061:Flavobacterium -1.163678 -1.809827 -1.126862 -1.418989
## ASV15299:Achromobacter -1.163678 -1.809827 -1.126862 -1.418989
## SLSU21BBEB9 SLSU21MGEB1 SLSU21MGEB2 SLSU21MGEB3
## ASV13564:Paracaedibacteraceae -1.304547 -0.5300825 -1.796115 -1.226363
## ASV14092:Roseibacillus -1.304547 -0.5300825 -1.796115 4.268245
## ASV14296:Acinetobacter -1.304547 -0.5300825 -1.796115 -1.226363
## ASV14326:Streptococcus -1.304547 -0.5300825 4.133997 -1.226363
## ASV15061:Flavobacterium -1.304547 -0.5300825 -1.796115 -1.226363
## ASV15299:Achromobacter -1.304547 -0.5300825 -1.796115 -1.226363
## SLSU21MGEB4 SLSU21MGEB5 SLSU21MLEB1 SLSU21MLEB2
## ASV13564:Paracaedibacteraceae -1.527369 -1.376554 -1.8829827 -1.615445
## ASV14092:Roseibacillus -1.527369 -1.376554 -1.8829827 -1.615445
## ASV14296:Acinetobacter -1.527369 -1.376554 -1.8829827 2.754044
## ASV14326:Streptococcus -1.527369 -1.376554 -1.8829827 -1.615445
## ASV15061:Flavobacterium -1.527369 -1.376554 0.8088769 -1.615445
## ASV15299:Achromobacter -1.527369 -1.376554 -1.8829827 2.802231
## SLSU21MLEB5 SLSU21MLEB6 SLSU21MLEB7 SLSU21SSEB2
## ASV13564:Paracaedibacteraceae -1.779624 -1.271161 3.965944 -1.656262
## ASV14092:Roseibacillus -1.779624 -1.271161 -1.853857 -1.656262
## ASV14296:Acinetobacter 2.297964 -1.271161 -1.853857 -1.656262
## ASV14326:Streptococcus -1.779624 -1.271161 -1.853857 -1.656262
## ASV15061:Flavobacterium -1.779624 -1.271161 3.790624 -1.656262
## ASV15299:Achromobacter 1.900937 -1.271161 -1.853857 -1.656262
## SLSU21SSEB3 SLSU21SSEB5 SLSU21SSEB6 SLSU21SSEB7
## ASV13564:Paracaedibacteraceae -2.19878 -1.677103 -1.322530 -1.426711
## ASV14092:Roseibacillus -2.19878 -1.677103 -1.322530 -1.426711
## ASV14296:Acinetobacter -2.19878 -1.677103 -1.322530 -1.426711
## ASV14326:Streptococcus -2.19878 -1.677103 -1.322530 -1.426711
## ASV15061:Flavobacterium -2.19878 -1.677103 2.029985 -1.426711
## ASV15299:Achromobacter -2.19878 -1.677103 -1.322530 -1.426711
pslist$ps_WGCNA <- ps_SL_WGCNA
pslist$clr_WGCNA <- ps_SL_WGCNA_clr
######
#Plot#
######
for (i in names(pslist)[grepl("clr_WGCNA", names(pslist))]){
require(plyr)
require(ggrepel)
require(cowplot)
require(DESeq2)
#if (OperatingSystem == "Mac") {quartz() }
TSE <-mia::makeTreeSummarizedExperimentFromPhyloseq(pslist[[i]])
tryCatch({
g <- paste(i); print(i)
SSUclrPCAPlot<-pcaplotRK3(TSE,intgroup = c("Replicates"), pcX = 1, pcY = 2, title="",ellipse = TRUE, ellipse.prob = 0.5) +
scale_fill_manual(values=col.Palette$SL) + #col.Palette.SeqCenter #col.Palette.Cruises
scale_color_manual(values=col.Palette$SL) + atheme +
theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
theme(
panel.grid.major = element_line(colour = "grey50"),
panel.grid.minor = element_line(colour = "grey50"))
prow <- cowplot::plot_grid(SSUclrPCAPlot, labels = c(""), ncol = 1)
title <- ggdraw() + draw_label_themeRKwhite(paste(g), element = "plot.title",x = 0.05, hjust = 0, vjust = 1)
subtitle <- ggdraw() + draw_label_themeRKwhite(paste("species_glom clr-PCA", "with", ... =
length(rownames(TSE)),"bacterial species",sep = " "), element = "plot.subtitle",x = 0.05, hjust = 0,
vjust = 1)
SSUclrPCAPlot<- plot_grid(title, subtitle, prow, ncol = 1, rel_heights = c(0, 0.05, 0.989))
ggsave(SSUclrPCAPlot, filename = paste(paste(save_name_16S, "SSU", "Filter-clrPCA" , sep="_"),".png", sep=""), path = pathPlots , device='png', dpi=300, width = 7,
height = 7)
SSUclrPCAPlot ->> SSUclrPCAPlot
plot(SSUclrPCAPlot)
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}
## [1] "clr_WGCNA"
## [1] "Adapted from Marini F, Binder H (2019). “pcaExplorer: an R/Bioconductor package for interacting with RNA-seq principal components.” BMC Bioinformatics, 20(1), 331. doi:10.1186/s12859-019-2879-1, https://bioconductor.org/packages/pcaExplorer/."
#=====================================================================================
# Code chunk 3
#=====================================================================================
library(WGCNA)
omics_data0 <- as.data.frame(otu_table(ps_SL_WGCNA_clr)) #%>% t()
dim(omics_data0 %>% paste(c("Samples", "OTUs")))
## NULL
gsg = goodSamplesGenes(omics_data0, verbose = 3);
## Flagging genes and samples with too many missing values...
## ..step 1
gsg$allOK
## [1] TRUE
#=====================================================================================
# Code chunk 4
#=====================================================================================
if (!gsg$allOK)
{
# Optionally, print the gene and sample names that were removed:
if (sum(!gsg$goodGenes)>0)
printFlush(paste("Removing genes:", paste(names(omics_data0)[!gsg$goodGenes], collapse = ", ")));
if (sum(!gsg$goodSamples)>0)
printFlush(paste("Removing samples:", paste(rownames(omics_data0)[!gsg$goodSamples], collapse = ", ")));
# Remove the offending genes and samples from the data:
omics_data0 = omics_data0[gsg$goodSamples, gsg$goodGenes]
}
#=====================================================================================
# Code chunk 5
#=====================================================================================
sampleTree = hclust(dist(omics_data0), method = "average");
# Plot the sample tree: Open a graphic output window of size 12 by 9 inches
# The user should change the dimensions if the window is too large or too small.
#pdf(file = "Plots/sampleClustering.pdf", width = 12, height = 9);
par(cex = 0.6);
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,
cex.axis = 1.5, cex.main = 2)
#=====================================================================================
# Code chunk 6
#=====================================================================================
# Plot a line to show the cut
abline(h = 100, col = "red");
# Determine cluster under the line
clust = cutreeStatic(sampleTree, cutHeight = 100, minSize = 10)
table(clust)
## clust
## 1
## 24
# clust 1 contains the samples we want to keep.
#in Case we would exclude outliers from the tree:
#keepSamples = (clust==1)
#omics_data = omics_data0[keepSamples, ]
recordPlot() -> SampleClusteringTreePlot
#Keep all samples
omics_data <- omics_data0
nGenes = ncol(omics_data)
nSamples = nrow(omics_data)
#=====================================================================================
# Code chunk 7
#=====================================================================================
datTraits<-SAMDF_16S[,traitData]
datTraits <- datTraits[rownames(datTraits) %in% rownames(omics_data),]
collectGarbage()
#=====================================================================================
# Code chunk 8
#=====================================================================================
# Re-cluster samples
sampleTree2 = hclust(dist(omics_data), method = "average")
# Convert traits to a color representation: white means low, red means high, grey means missing entry
traitColors = numbers2colors(datTraits, signed = FALSE)
# Plot the sample dendrogram and the colors underneath.
plotDendroAndColors(sampleTree2, traitColors,
groupLabels = names(datTraits),
main = "Sample dendrogram and trait heatmap")
recordPlot() -> SampleClusteringTreeTraitsPlot
#=====================================================================================
# Code chunk 9
#=====================================================================================
save(omics_data, datTraits, file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_16S, "dataInput", Date, sep="_"), ".RData", sep=""))))
##############
#Summary Plot#
##############
cowplot::plot_grid(FilterZerosPlot , SSUclrPCAPlot, labels = c("A", "B"), ncol = 2, rel_heights = c(1,1)) -> part_1
cowplot::plot_grid(part_1 , SampleClusteringTreeTraitsPlot, labels = c("", "C"), ncol = 1) -> part_2
ggsave(part_2, filename = paste(paste(save_name_16S, "SSU_DataInputPlot", Date, sep="_"),".png", sep=""), path = pathPlots , device='png', dpi=300, width = 10,
height = 15)
Rosales et al., 2023 Network analysis To identify ASVs that co-associate in AH, DU, and DL samples, CLR-transformed counts were used for weighted correlation network analysis (WGCNA) with the WGCNA 1.70-3R package [76]. The network was constructed unsigned with the following parameters: power = 3, minimum module size = 12, deep split = 2, and merged cut height = 0.25. The eigenvalues were correlated to AH, DU, and DL using Pearson correlation with the R function cor. The highest correlation in each disease state was then selected for network construction using the R package SpiecEasi 1.0.5 [77]. The network was then constructed as previously reported [11]. Briefly, the Stability Approach to Regularization Selection (StARS) [77] model was chosen along with the method Meinshausen–Bühlmann’s neighborhood selection [78]. For StARS, 100 subsamples were used with a variability threshold of 10−3. The centrality (node importance) was evaluated [79] using the functions centrality_degree (neighbors = the number of adjacent edges or neighbors) and centrality_edge_betweenness (centrality = the number of shortest paths going through an edge) [80]. The package influenceR 0.1.0. [81] selected important ASVs in each network and assigned the top “key players” [38], which were labeled with their respective orders.
Jameson et al., 2023 Co-occurrence patterns between taxa with putative roles in N2O production and the rest of the microbial community ASVs were explored using proportionality analysis within the propr package52. ASV tables were trimmed to select taxa that occurred ≥10 times in at least 10% of samples prior to network-level analyses to improve interpretability and minimize the risk of spurious correlations. Pairwise interactions between individual taxa with rho values greater than 0.60 were plotted using Cytoscape v3.9.0 and network topological indices were calculated using the NetworkAnalyzer tool114. Relationships between microbial community structure and rate processes were then assessed using weighted gene correlational network analysis (WGCNA) performed with the WGCNA package115. The signed adjacency measure was first calculated for each pair of features (ASVs) by raising the absolute value of their pairwise correlation coefficients to a soft-thresholding power of 8 to maximize the scale-free topology fit. Hierarchical clustering of taxa into discrete subnetworks was completed using a minimum module size threshold of 20 and a dissimilarity threshold of 0.3. Pearson correlation coefficients and corresponding p-values are reported for correlations between sample traits, subnetwork eigengenes, and individual ASVs (Supplementary Data 1). Subnetwork membership and intranetwork connectivity measures are also reported for each ASV and were used in further analyses to assess broad relationships between ASV connectivity and importance with respect to N2O production rates.
Mach et al., 2021 To confirm the results of the DE analysis, the weighted gene co-expression network analysis (WGCNA) method was also run on the “E1” matrix using the WGCNA R package (version 1.69) (Langfelder and Horvath, 2008). The parameters for the analysis were set as follows: “corFnc” = bicor, “type” = signed hybrid, “beta” = 10, “deepSplit” = 4, “minClusterSize” = 30, “cutHeight” = 0.1. The eigengenes corresponding to each identified module were correlated individually to all the 1H NMR and biochemical assay metabolites, i.e., a set of 56 different molecules (see next paragraphs). A module was then considered positively or negatively associated to this set of molecules if the Pearson r correlation values were ≥ |0.65| for at least 5 molecules and if all the corresponding p-values were ≤ 1e−05. The positively and the negatively correlated modules defined in this way were merged to obtain a single gene list, which was subsequently compared to the differentially expressed genes (DEGs) list using a Venn diagram.
Strand et al, 2021 4.2.2. Network inference For network inference, we used the Weighted Gene Co-expression Network Analysis (WGCNA) R package [24] and the function blockwiseModules with the bicor correlation measure and parameters maxBlockSize = 10000, networkType = “signed”, TOMType = “signed”, corType = “bicor”, maxPOutliers = 0.05, replaceMissingAdjacencies = TRUE, pamStage = F, deepSplit = 4, minModuleSize = 2, minKMEtoStay = 0.5, minCoreKME = 0.5, minCoreKMESize = 2, reassignThreshold = 0 mergeCutHeight = 0.4/0.5 (for microbiota/host respectively).
Our analysis relies heavily on network modules, and hence the parameters related to module detection and trimming influence the results. Briefly, deepSplit controls the sensitivity of the module detection approach by hierarchical clustering, with a value of 1 being the least sensitive and 4 being the most sensitive. minModuleSize controls the minimum size of modules in the clustering step. Nodes with a correlation to the module eigennode (KME) lower than minKMEtoStay are trimmed from the module, and the module is deleted if it does not have a core of at least minCoreKMESize nodes (with core nodes being defined as having a KME greater than minCoreKME). Finally, different modules with eigennodes that correlate above the 1 – mergeCutHeight threshold are merged. Note that the final modules can be smaller than minModuleSize due to trimming (but not smaller than minCoreKMESize), and that they can include nodes with a KME lower than minKMEtoStay due to module merging. Our parameters were set to detect highly correlated and potentially small modules initially, thus not missing interesting profiles displayed by few genes/OTUs, and then to apply an aggressive merging threshold to avoid dealing with highly redundant modules in downstream analysis.
#=====================================================================================
# Code chunk 1
#=====================================================================================
# Load the WGCNA package
library(WGCNA)
# The following setting is important, do not omit.
options(stringsAsFactors = FALSE);
# Allow multi-threading within WGCNA. At present this call is necessary.
# Any error here may be ignored but you may want to update WGCNA if you see one.
# Caution: skip this line if you run RStudio or other third-party R environments.
# See note above.
#enableWGCNAThreads()
# Load the data saved in the first part
lnames = load(file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_16S, "dataInput", Date, sep="_"), ".RData", sep=""))))
#The variable lnames contains the names of loaded variables.
lnames
## [1] "omics_data" "datTraits"
#=====================================================================================
# Code chunk 2
#=====================================================================================
# Choose a set of soft-thresholding powers
powers = c(c(1:10), seq(from = 12, to=20, by=2))
# Call the network topology analysis function
allowWGCNAThreads() #needed otherwise would not work! https://www.biostars.org/p/122349/
## Allowing multi-threading with up to 8 threads.
sft = pickSoftThreshold(omics_data, powerVector = powers, verbose = 5)
## pickSoftThreshold: will use block size 1091.
## pickSoftThreshold: calculating connectivity for given powers...
## ..working on genes 1 through 1091 of 1091
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1 1 0.410 -2.42 0.940 237.000 229.0000 385.0
## 2 2 0.637 -2.03 0.956 79.900 72.7000 184.0
## 3 3 0.750 -1.76 0.956 34.500 28.4000 103.0
## 4 4 0.819 -1.59 0.943 17.700 13.7000 63.6
## 5 5 0.840 -1.58 0.937 10.300 7.4100 45.4
## 6 6 0.901 -1.47 0.946 6.620 4.3500 34.2
## 7 7 0.937 -1.43 0.957 4.600 2.6300 27.1
## 8 8 0.920 -1.40 0.919 3.400 1.6700 23.1
## 9 9 0.905 -1.39 0.890 2.640 1.0900 20.2
## 10 10 0.953 -1.30 0.940 2.140 0.7460 17.9
## 11 12 0.892 -1.25 0.874 1.530 0.3700 15.2
## 12 14 0.913 -1.22 0.901 1.200 0.2030 14.3
## 13 16 0.879 -1.22 0.849 0.996 0.1130 13.7
## 14 18 0.863 -1.20 0.826 0.860 0.0645 13.2
## 15 20 0.892 -1.17 0.863 0.764 0.0384 12.9
# Plot the results:
# Find the soft thresholding power beta to which co-expression similarity is raised to calculate adjacency.
# based on the criterion of approximate scale-free topology.
idx <- min(which((-sign(sft$fitIndices[,3])*sft$fitIndices[,2]) > 0.90))
if(is.infinite(idx)){
idx <- min(which((-sign(sft$fitIndices[,3])*sft$fitIndices[,2]) > 0.80))
if(!is.infinite(idx)){
st <- sft$fitIndices[idx,1]
} else{
idx <- which.max(-sign(sft$fitIndices[,3])*sft$fitIndices[,2])
st <- sft$fitIndices[idx,1]
}
} else{st <- sft$fitIndices[idx,1]}
# Plot Scale independence measure and Mean connectivity measure
# Scale-free topology fit index as a function of the soft-thresholding power
data.frame(Indices = sft$fitIndices[,1],
sfApprox = -sign(sft$fitIndices[,3])*sft$fitIndices[,2]) %>%
ggplot() +
geom_hline(yintercept = 0.9, color = "red", alpha = 0.6) + # corresponds to R^2 cut-off of 0.9
geom_hline(yintercept = 0.8, color = "red", alpha = 0.2) + # corresponds to R^2 cut-off of 0.8
geom_line(aes(x = Indices, y = sfApprox), color = "red", alpha = 0.1, size = 2.5) +
geom_text(mapping = aes(x = Indices, y = sfApprox, label = Indices), color = "red", size = 4) +
ggtitle("Scale independence") +
xlab("Soft Threshold (power)") +
ylab("SF Model Fit,signed R^2") +
xlim(1,20) +
ylim(-1,1) +
geom_segment(aes(x = st, y = 0.25, xend = st, yend = sfApprox[idx]-0.05),
arrow = arrow(length = unit(0.2,"cm")),
size = 0.5)-> scale_independence_plot
# Mean connectivity as a function of the soft-thresholding power
data.frame(Indices = sft$fitIndices[,1],
meanApprox = sft$fitIndices[,5]) %>%
ggplot() +
geom_line(aes(x = Indices, y = meanApprox), color = "red", alpha = 0.1, size = 2.5) +
geom_text(mapping = aes(x = Indices, y = meanApprox, label = Indices), color = "red", size = 4) +
xlab("Soft Threshold (power)") +
ylab("Mean Connectivity") +
geom_segment(aes(x = st-0.4,
y = sft$fitIndices$mean.k.[idx],
xend = 0,
yend = sft$fitIndices$mean.k.[idx]),
arrow = arrow(length = unit(0.2,"cm")),
size = 0.4) +
ggtitle(paste0("Mean connectivity: ",
round(sft$fitIndices$mean.k.[idx],2))) -> mean_connectivity_plot
cowplot::plot_grid(scale_independence_plot, mean_connectivity_plot, ncol = 2, align = "h", labels = c("A", "B")) -> si_mc_plot
ggsave(si_mc_plot, filename = paste(paste(save_name_16S, "soft_thresholding_power" , sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8, height = 6)
si_mc_plot
softPower = st; print(paste("SoftPower chosen to", softPower))
## [1] "SoftPower chosen to 6"
softPower <- 6
deepSplit integer value between 0 and 4. Provides a simplified control over how sensitive module detection should be to module splitting, with 0 least and 4 most sensitive. See cutreeDynamic for more details.
##################
#blockwiseModules#
##################
softPower = st; print(paste("SoftPower chosen to", softPower))
## [1] "SoftPower chosen to 6"
network = blockwiseModules(omics_data,
power = st,
networkType = "signed",
TOMType = "signed",
corType = "bicor",
minModuleSize = 5, #for genes 30
reassignThreshold = 0,
deepSplit = 3,
mergeCutHeight = 0.25,
numericLabels = TRUE,
pamRespectsDendro = FALSE,
saveTOMs = TRUE,
saveTOMFileBase = paste(Species,Tissue,Type,Season, "TOM", sep="_"),
verbose = 3)
## Calculating module eigengenes block-wise from all genes
## Flagging genes and samples with too many missing values...
## ..step 1
## ..Working on block 1 .
## TOM calculation: adjacency..
## ..will use 8 parallel threads.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
## ..saving TOM for block 1 into file SL_Gill_16S_Summer_TOM-block.1.RData
## ....clustering..
## ....detecting modules..
## ....calculating module eigengenes..
## ....checking kME in modules..
## ..removing 212 genes from module 1 because their KME is too low.
## ..removing 82 genes from module 2 because their KME is too low.
## ..removing 2 genes from module 3 because their KME is too low.
## ..removing 2 genes from module 4 because their KME is too low.
## ..removing 5 genes from module 5 because their KME is too low.
## ..removing 2 genes from module 6 because their KME is too low.
## ..merging modules that are too close..
## mergeCloseModules: Merging modules whose distance is less than 0.25
## Calculating new MEs...
#Strand et., 2021
# network <- blockwiseModules(omics_data,
# power = st,
# networkType = "signed",
# TOMType = "signed",
# corType = "bicor",
# maxPOutliers = 0.05,
# deepSplit = 4, # Default 2
# minModuleSize = 2, # 30
# minCoreKME = 0.5, # Default 0.5
# minCoreKMESize = 2, # Default minModuleSize/3,
# minKMEtoStay = 0.5, # Default 0.3
# reassignThreshold = 0, # Default 1e-6
# mergeCutHeight = 0.4, # Default 0.15
# pamStage = FALSE,
# pamRespectsDendro = TRUE,
# replaceMissingAdjacencies = TRUE,
# numericLabels = TRUE,
# saveTOMs = TRUE,
# saveTOMFileBase = paste(Species,Tissue,Type,Season, "TOM", sep="_"),
# verbose = 3)
###########
#Save Data#
moduleLabels = network$colors
moduleColors = labels2colors(network$colors)
MEs = network$MEs
geneTree = network$dendrograms[[1]]
save(MEs, moduleLabels, moduleColors, geneTree,
file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_16S, "networkConstruction-auto", Date, sep="_"), ".RData", sep=""))))
SSU_Gill_WGCNA <- list("SSU_Gill_omics_data" = omics_data,
"SSU_Gill_datTraits"=datTraits,
"SSU_Gill_MEs"=MEs,
"SSU_Gill_moduleLabels"=moduleLabels,
"SSU_Gill_moduleColors"=moduleColors,
"SSU_Gill_geneTree"=geneTree)
saveRDS(SSU_Gill_WGCNA, file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_16S, "SSU_List", Date, sep="_"), ".rds", sep=""))))
#####################
#plotDendroAndColors#
#####################
##############################
#Number of ASV in each module#
##############################
as.data.frame(table(SSU_Gill_WGCNA$SSU_Gill_moduleLabels)) %>%
dplyr::rename(Module = Var1, Size = Freq) %>%
dplyr::mutate(Module_color = labels2colors(as.numeric(as.character(Module)))) -> module_size
module_size %>%
ggplot(aes(x = Module, y = Size, fill = Module)) +
geom_col(color = "#000000") +
ggtitle("Number of genes in each module") +
theme(legend.position = "none") +
scale_fill_manual(values = setNames(module_size$Module_color,module_size$Module)) +
geom_text(aes(label = Size),vjust = 0.5, hjust = -0.18, size = 3.5) +
ylim(0, max(module_size$Size)*1.1) +
theme(plot.margin = margin(2, 2, 2, 2, "pt")) +
coord_flip() -> ASVModuleSizePlot
ASVModuleSizePlot
ggsave(ASVModuleSizePlot, filename = paste(paste(save_name_16S, "ASVModulePlot" , sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8, height = 6)
#######################################
#Module-Eigenegene-Correlation-Heatmap#
#######################################
MEs_R <- bicor(MEs, MEs, maxPOutliers = 0.05)
idx.r <- which(rownames(MEs_R) == "ME0")
idx.c <- which(colnames(MEs_R) == "ME0")
MEs_R_noME0 <- MEs_R[-idx.r, -idx.c]
MEs_R_noME0[upper.tri(MEs_R_noME0)] %>%
as.data.frame() %>%
dplyr::rename("correlation" = ".") %>%
ggplot(aes(x=correlation)) +
geom_histogram(bins = 20) +
#geom_density() +
xlim(-1, 1) +
ggtitle(paste0(prefix,"ME correlation\n w/o ",prefix ,"ME0")) -> MEs_R_density
#MEs_R_density
MEs_R_density
#MEs_R_density
pheatmap::pheatmap(MEs_R_noME0, color = colorRampPalette(c("Blue", "White", "Red"))(100),
silent = T,
breaks = seq(-1,1,length.out = 101),
treeheight_row = 5,
treeheight_col = 5,
main = paste0(prefix,"ME correlation heatmap w/o ",prefix ,"ME0"),
labels_row = paste0(prefix, rownames(MEs_R)),
labels_col = paste0(prefix, colnames(MEs_R))) -> MEs_R_Corr
ggsave(MEs_R_Corr, filename = paste(paste(save_name_16S, "MEs_R_Corr" , sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8, height = 6)
cowplot::plot_grid(MEs_R_density, MEs_R_Corr$gtable, labels = c("D", "E"), rel_widths = c(0.6, 1)) -> density_eigen
density_eigen
#######################
#Network-Visualization#
#######################
# 5 Visualization of networks within R
# 5.a Visualizing the gene network
# One way to visualize a weighted network is to plot its heatmap, Fig. 1. Each row and column of the heatmap
# correspond to a single gene. The heatmap can depict adjacencies or topological overlaps, with light colors denoting
# low adjacency (overlap) and darker colors higher adjacency (overlap). In addition, the gene dendrograms and module
# colors are plotted along the top and left side of the heatmap. The package provides a convenient function to create
# such network plots; Fig. 1 was created using the following code. This code can be executed only if the network
# was calculated using a single-block approach (that is, using the 1-step automatic or the step-by-step tutorials). If
# the networks were calculated using the block-wise approach, the user will need to modify this code to perform the
# visualization in each block separately. The modi cation is simple and we leave it as an exercise for the interested
# reader.
# Calculate topological overlap anew: this could be done more efficiently by saving the TOM
# calculated during module detection, but let us do it again here.
# dissTOM = 1-TOMsimilarityFromExpr(omics_data, power = softPower);
# # Transform dissTOM with a power to make moderately strong connections more visible in the heatmap
# plotTOM = dissTOM^10;
# # Set diagonal to NA for a nicer plot
# diag(plotTOM) = NA;
# # Call the plot function
# TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")
#############################
# Correlation within modules# from Strand et al, 2021
#############################
corr_within_module <- function(omics_data, network, module_x = 1){
idx.omics_data <- which(network$colors == module_x)
idx.me <- which(colnames(network$MEs) == paste0("ME",module_x))
kME_x <- bicor(omics_data[,idx.omics_data], network$MEs[,idx.me], maxPOutliers = 0.05)
kME_x}
ggplot.list <- list()
for(m in colnames(network$MEs)){
h <- as.numeric(sub("ME","", m))
data.frame(x = suppressWarnings(corr_within_module(omics_data = omics_data, network = network, module_x = h))) %>%
ggplot() +
geom_histogram(aes(x), fill = labels2colors(h), color = "black", alpha = 0.5, bins = 20) +
xlim(-1, 1) +
xlab("ASV correlation")+
ggtitle(paste0(prefix,m)) -> da_plot
ggplot.list[[m]] <- da_plot}
ggplot.list <- ggplot.list[ggplot.list %>% names() %>% sub("ME", "", .) %>% as.numeric() %>% order()]
cowplot::plot_grid(plotlist = ggplot.list, ncol = 3) -> density_all_plot
density_all_plot
ggsave(density_all_plot, filename = paste(paste(save_name_16S, "WithinModuleCorrelation" , sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8, height = 12)
##############
#Summary Plot#
##############
cowplot::plot_grid(si_mc_plot, ASVModuleSizePlot, labels = c("","C"), ncol = 2) -> part_1
cowplot::plot_grid(part_1, density_eigen, labels = c("", ""), ncol = 1, rel_widths = c(1,0.5)) -> part_2
cowplot::plot_grid(part_2, density_all_plot, labels = c("", "F"), rel_widths = c(1,0.1), ncol = 1) -> part_3
ggsave(part_3, filename = paste(paste(save_name_16S, "SSU_Network", Date, sep="_"),".png", sep=""), path = pathPlots , device='png', dpi=300, width = 12,
height = 10)
###################
#Self made Heatmap#
###################
#https://bioinformaticsworkbook.org/tutorials/wgcna.html#gsc.tab=0
# Define numbers of genes and samples
MEs = orderMEs(SSU_Gill_WGCNA$SSU_Gill_MEs)
names(MEs) = names(MEs) %>% gsub("ME","", .) %>% paste("SSU",., sep="")
moduleTraitCor = cor(MEs, datTraits, use = "p");
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples);
textMatrix <- paste(signif(moduleTraitCor, 2), "\n(", signif(moduleTraitPvalue, 1), ")", sep = "")
dim(textMatrix) <- dim(moduleTraitCor)
# Add treatment names
MEs$treatment = row.names(MEs)
mat <- as.data.frame(t(moduleTraitCor))
mat$traits <- rownames(mat)
# tidy & plot data
module_order = names(MEs)
mME = mat %>%
pivot_longer(-traits) %>%
mutate(#name = gsub("ME", "", name),
name = factor(name, levels = module_order))
textMatrixLong <- as.data.frame(t(signif(moduleTraitCor, 2)))
textMatrixLong$traits <- rownames(textMatrixLong)
textMatrixLong = textMatrixLong %>%
pivot_longer(-traits) %>%
mutate(
#name = gsub("ME", "", name),
name = factor(name, levels = module_order))
textMatrixLong <- as.data.frame(textMatrixLong)
textMatrixLong2 <- as.data.frame(t(signif(moduleTraitPvalue, 1)))
textMatrixLong2$traits <- rownames(textMatrixLong2)
textMatrixLong2 = textMatrixLong2%>%
pivot_longer(-traits) %>%
mutate(
name = gsub("ME", "", name),
name = factor(name, levels = module_order))
textMatrixLong2 <- as.data.frame(textMatrixLong2)
## add gene counts per module
genesmod<- as.data.frame(moduleLabels)
genesmod$genes <- rownames(genesmod)
genesmod$Modules <- paste("SSU",genesmod$moduleLabels, sep="") #labels2colors(genesmod$moduleLabels)
ModCount <- as.data.frame(genesmod %>%
dplyr::group_by(Modules) %>%
dplyr::summarise(n = n()) )
ModCount <- ModCount[order(as.numeric(ModCount$n), decreasing = T),]
HM <- mME %>% ggplot(., aes(x=traits, y=name, fill=value)) +
geom_tile() +
scale_fill_gradient2(
low = "steelblue1",
high = "red",
mid = "white",
midpoint = 0,
limit = c(-1,1)) +
theme(axis.text.x = element_text(angle=90)) +
labs(title = "Module-trait Relationships", y = "Modules", fill="corr") +
geom_text(aes(label=textMatrixLong$value), position=position_nudge(y=0.2),
size=2.5, colour="grey20") +
geom_text(aes(label=paste0("(",textMatrixLong2$value,")")), position=position_nudge(y=-0.2),
colour="grey20", size=2.5) +
theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
theme( panel.grid.major = element_blank(),panel.grid.minor = element_blank())
prow <- cowplot::plot_grid(HM, labels = c(""), ncol = 1)
A<- plot_grid(prow, ncol = 1, rel_heights = c(0.02, 0.05, 0.98))
plot(A)
ggsave(A, filename = paste(paste(save_name_16S, "ModuleHeatmap", Date, sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8, height = 6)
mME<- mME%>% mutate(Category = case_when((traits %in% c(
"O2",
"Salinity",
"SecchiDepth"
)) ~ "Abiotics",
(traits %in% c(
"FultonK",
"Length",
"Weight",
"StomachContent",
"HSI",
"SSI")) ~ "Physiology")); Order<- c("Abiotics", "Physiology")
HM <- mME %>% ggplot(., aes(x=name, y=factor(traits, levels=traitData), fill=value)) +
geom_tile() +
scale_fill_gradientn(
colours = c("steelblue1", "white", "red"),
limit = c(-1,1),
values = scales::rescale(c(-1, -0.3, 0, 0.3, 1))) +
scale_x_discrete(limits=ModCount$Modules, labels=paste(ModCount$Modules, ": ", ModCount$n)) +
facet_grid(factor(mME$Category, levels=Order), scales = "free", space = "free") +
theme(axis.text.x = element_text(angle=90)) +
labs(title = "Module-trait Relationships", y = "Modules", fill="corr") +
geom_text(aes(label=textMatrixLong$value), position=position_nudge(y=0.2),
size=2.5, colour="grey20") +
geom_text(aes(label=paste0("(",textMatrixLong2$value,")")), position=position_nudge(y=-0.2),
colour="grey20", size=2.5) +
theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
theme( panel.grid.major = element_blank(),panel.grid.minor = element_blank())
prow <- cowplot::plot_grid(HM, labels = c(""), ncol = 1)
WGCNA_ModuleHeatmap <- plot_grid(prow, ncol = 1, rel_heights = c(0.02, 0.05, 0.98))
plot(WGCNA_ModuleHeatmap)
ggsave(WGCNA_ModuleHeatmap, filename = paste(paste(save_name_16S, "ModuleHeatmap-2", Date, sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 7, height = 6)
cowplot::plot_grid(WGCNA_ModuleHeatmap, AbioticsLinePlot, labels = c("A", "B"), rel_widths = c(1,0.5), ncol = 2) -> part_4
ggsave(part_4, filename = paste(paste(save_name_16S, "SSU_HmapAbiotics", Date, sep="_"),".png", sep=""), path = pathPlots , device='png', dpi=300, width = 12, height = 7)
##################
#Specific Modules#
##################
# Module <- "darkturquoise"
# HM <- mME[mME$name == Module,] %>% ggplot(., aes(x=name, y=factor(traits, levels=traitData), fill=value)) +
# geom_tile() +
# scale_fill_gradientn(
# colours = c("steelblue1", "white", "red"),
# limit = c(-1,1),
# values = scales::rescale(c(-1, -0.3, 0, 0.3, 1))) +
# # scale_fill_gradient2(
# # low = "steelblue1",
# # high = "red",
# # mid = "white",
# # midpoint = 0,
# # limit = c(-1,1))
# #facet_grid(factor(mME$Category, levels=Order), scales = "free", space = "free") +
# theme(axis.text.x = element_text(angle=90)) +
# labs(title = "", y = "Modules", fill="corr") +
# geom_text(aes(label=textMatrixLong[textMatrixLong$name == Module,]$value), position=position_nudge(y=0.2),
# size=3, colour="grey20") +
# geom_text(aes(label=paste0("(",textMatrixLong2[textMatrixLong2$name == Module,]$value,")")), position=position_nudge(y=-0.2),
# colour="grey20", size=3) +
# theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
# theme( panel.grid.major = element_blank(),panel.grid.minor = element_blank())
# prow <- cowplot::plot_grid(HM, labels = c(""), ncol = 1)
# A<- plot_grid(prow, ncol = 1, rel_heights = c(0.02, 0.05, 0.98))
# plot(A)
# ggsave(A, filename = paste("WGCNA-ModuleHeatmap", Module, sep="_"), path = pathPlots,
# device='png', dpi=300, width = 2.8,height = 7)
#############################################
#Create Bacterial Counts and Reseq Dataframe#
#############################################
TAX <- as.data.frame(tax_table(pslist$ps_SL_21))
OTU <- as.data.frame(t(otu_table(pslist$ps_SL_21)))
OTU$ASV <- rownames(OTU)
REFSEQ <- refseq(pslist$ps_SL_21)
REFSEQ <- stack(as.character(REFSEQ, use.names=TRUE))
colnames(REFSEQ)[colnames(REFSEQ) == "ind"] <- "ASV"
SSUData <- TAX %>% rownames_to_column("RowName") %>%
left_join(OTU %>% rownames_to_column("RowName"), by = "RowName") %>%
column_to_rownames("RowName")
SSUData <- left_join(SSUData , REFSEQ)
rownames(SSUData) <- SSUData$ASV
#Count species per Sample
#ddply(SSUData ,.(ASV),numcolwise(sum))
############################################################################
#Create WGCNA Dataframe: Species, ModuleMembership, Correlation to Abiotics#
############################################################################
head(SSU_Gill_WGCNA$SSU_Gill_MEs)
## ME4 ME1 ME10 ME9 ME2
## SLSU21BBEB7 0.12148982 0.008285158 -0.03329939 -0.1959358 -0.15397248
## SLSU21MGEB7 0.05778488 0.361649593 0.36861328 -0.2785649 -0.22050189
## SLSU21MLEB9 -0.41317559 -0.259446826 -0.18140169 -0.2669234 0.37503141
## SLSU21SSEB9 -0.15255770 -0.180701201 -0.30363879 -0.2616119 -0.18207038
## SLSU21BBEB1 0.27275518 0.080016048 0.05260033 -0.1937149 -0.06962721
## SLSU21BBEB2 0.11108720 0.050941806 0.29121502 -0.2333171 -0.16882699
## ME3 ME6 ME11 ME12 ME8
## SLSU21BBEB7 -0.1102952 0.26924243 0.0857673 -0.248408637 -0.08334826
## SLSU21MGEB7 -0.2788859 0.42896168 0.3807363 -0.268125703 -0.30449636
## SLSU21MLEB9 0.3155212 0.31700626 0.2737803 -0.001790919 0.03665652
## SLSU21SSEB9 0.1438393 0.47185604 0.3614075 0.112479044 0.41489712
## SLSU21BBEB1 -0.1539043 -0.21238201 0.2393287 -0.136719927 -0.08523905
## SLSU21BBEB2 -0.1837196 -0.08721782 -0.1664743 0.205956955 0.27067429
## ME5 ME7 ME0
## SLSU21BBEB7 0.20722219 0.007386758 -0.24211297
## SLSU21MGEB7 0.06631932 -0.051195278 -0.18526957
## SLSU21MLEB9 0.09759643 0.090743867 -0.11656118
## SLSU21SSEB9 0.35840553 0.254344995 -0.47273731
## SLSU21BBEB1 -0.15235473 -0.112350624 -0.01538266
## SLSU21BBEB2 0.18114820 0.066572246 -0.13896766
head(SSU_Gill_WGCNA$SSU_Gill_moduleLabels)
## ASV1:Elizabethkingia ASV2:Citrobacter ASV3:Enterobacteriaceae
## 4 4 4
## ASV4:Enterobacteriaceae ASV5:Enterobacter ASV6:Enterobacteriaceae
## 4 4 4
MEs <- SSU_Gill_WGCNA$SSU_Gill_MEs
omics_data <- SSU_Gill_WGCNA$SSU_Gill_omics_data
datTraits <- SSU_Gill_WGCNA$SSU_Gill_datTraits
moduleLabels <- SSU_Gill_WGCNA$SSU_Gill_moduleLabels
modNames <- names(MEs)
nSamples <- nrow(omics_data)
TraitOfInterest <- "O2"
DatTraitOfInterest <- as.data.frame(datTraits[TraitOfInterest])
SSUWGCNAlist <- list()
for (i in names(MEs)) {
a <- length(SSUWGCNAlist)
ModuleOfInterst <- paste(sub("ME", "", i))
geneModuleMembership = as.data.frame(cor(omics_data, MEs, use = "p"));
MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples));
names(geneModuleMembership) = paste("MM", modNames, sep="");
names(MMPvalue) = paste("p.MM", modNames, sep="");
geneTraitSignificance = as.data.frame(cor(omics_data, DatTraitOfInterest, use = "p"));
GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples));
names(geneTraitSignificance) = paste("GS.", names(DatTraitOfInterest), sep="");
names(GSPvalue) = paste("p.GS.", names(DatTraitOfInterest), sep="");
OmicsPerModule <- as.data.frame(names(moduleLabels[moduleLabels == ModuleOfInterst]))
names(OmicsPerModule) <- paste("ME", ModuleOfInterst, sep="")
rownames(OmicsPerModule) <- OmicsPerModule[,1]
SSUWGCNAdata <- OmicsPerModule %>% rownames_to_column("RowName") %>%
left_join(geneModuleMembership[rownames(OmicsPerModule),, drop = FALSE][names(geneModuleMembership) %in%
paste("MMME", ModuleOfInterst, sep="")] %>% rownames_to_column("RowName"), by = "RowName") %>%
column_to_rownames("RowName")
SSUWGCNAdata <- SSUWGCNAdata %>% rownames_to_column("RowName") %>%
left_join(geneTraitSignificance[rownames(OmicsPerModule),, drop = FALSE][names(geneTraitSignificance) %in%
paste("GS.", TraitOfInterest, sep="")] %>% rownames_to_column("RowName"), by = "RowName") %>%
column_to_rownames("RowName")
SSUWGCNAdata <- SSUWGCNAdata %>% rownames_to_column("RowName") %>%
left_join(SSUData[rownames(SSUData) %in% rownames(SSUWGCNAdata),] %>% rownames_to_column("RowName"),
by = "RowName") %>% column_to_rownames("RowName")
#Reoder by kME/Module Membership
SSUWGCNAdata <- SSUWGCNAdata %>%
dplyr::arrange(desc(SSUWGCNAdata[paste("MMME", ModuleOfInterst, sep="")]))
SSUWGCNAlist[[a+1]] <- SSUWGCNAdata
names(SSUWGCNAlist)[[a+1]] <- paste("SSU",ModuleOfInterst, sep="")
write.csv2(SSUWGCNAdata, paste0(file.path(path_Output_WGCNA, paste(paste(save_name_16S,
paste("SSU",ModuleOfInterst, sep=""), Date, sep="_"),".csv", sep=""))))
}
SSUWGCNAlist$All <- SSUData
saveRDS(SSUWGCNAlist, file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_16S, "SSU_List_2", Date, sep="_"), ".rds", sep=""))))
#Export ASVs as Fasta
# pslist$ps_SL %>%
# refseq() %>%
# Biostrings::writeXStringSet(paste0(file.path(path_Output_WGCNA, "SSUWGCNAlist_SL_21.08.23.asv.fna")), append=FALSE,
# compress=FALSE, compression_level=NA, format="fasta")
# fasta_sequences <- Biostrings::readDNAStringSet(paste0(file.path(path_Output_WGCNA, "SSUWGCNAlist_SL_21.08.23.asv.fna")))
# target_names <- SSUWGCNAlist$SSU8$ASV
# # Subset the sequences based on the target names
# subset_sequences <- fasta_sequences[names(fasta_sequences) %in% target_names]
# Biostrings::writeXStringSet(subset_sequences, paste0(file.path(path_Output_WGCNA, "SSUWGCNAlist_SL_21.08.23_SSU8.asv.fna")))
#######################################
#Extract and Inspect Sequences on NCBI#
#######################################
# minTotRelAbun = 0.001
# x = taxa_sums(pslist$ps_Mock)
# keepTaxa = which((x / sum(x)) > minTotRelAbun)
# prunedSet = prune_taxa(as.character(keepTaxa), pslist$ps_Mock)
#
# prunedSet %>%
# refseq() %>%
# Biostrings::writeXStringSet("~/asv.fna", append=FALSE,
# compress=FALSE, compression_level=NA, format="fasta")
SSU_Gill_WGCNA <-readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_16S, "SSU_List", Date, sep="_"), ".rds", sep=""))))
SSU_Gill_WGCNA_list <- list("SSU_Gill_WGCNA" = SSU_Gill_WGCNA)
#################################
#Cluster-Heatmap of Module Genes# in a loop for correlation higher 3
#################################
ExpressionSet <- as.data.frame(SSU_Gill_WGCNA$SSU_Gill_omics_data)
moduleLabels <- SSU_Gill_WGCNA$SSU_Gill_moduleLabels
InterestingComparison <- as.data.frame(names(SSU_Gill_WGCNA$SSU_Gill_MEs))
for (i in names(SSU_Gill_WGCNA_list)[grepl("SSU_Gill_WGCNA", names(SSU_Gill_WGCNA_list))]) {
#https://github.com/kevinblighe/E-MTAB-6141
tryCatch({
g <- paste(i)
gg <- gsub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", i); print(gg)
for (MODULE in unique(as.character(InterestingComparison$name))) {
tryCatch({
genes_of_interest <- names(ExpressionSet[moduleLabels == paste(sub("ME", "", MODULE))])
print(MODULE)
#print(head(names(ExpressionSet[moduleLabels == paste(sub("ME", "", MODULE))])))
FILENAME <- paste(paste(save_name_16S, "Heatplot", sub("ME", "SSU", MODULE), sep="_"),".png", sep="")
FILENAME2 <- paste(paste(save_name_16S, "Heatplot", sub("ME", "SSU", MODULE), "NoClust",sep="_"),".png", sep="")
TITLE <- draw_label_themeRKwhite(paste(Species, gg, Type, MODULE),
element = "plot.title", x = 0.05, hjust = 0, vjust = 1)
#For any unknown reason gene names like trnat-ugu_39 get changes by WGCNA to trnat.ugu_39
rowclusternum <- 1
columnclusternum <- 1
require(DESeq2)
require(tidyverse)
require(ggplot2)
BacteriaHeatPlotRKnoClust_SL(res = res, clr = ExpressionSet, Species = Species, min_count = 10,
genes_of_interest = genes_of_interest, Samples = Samples_SSU_Gill,
SAMDF = SAMDF_16S, TITLE = TITLE, filename= FILENAME, OutlineColor = "grey20")
if (MODULE == "ME2") {
plot(A)
} else {
print("Plots are saved to the pathPlots")
}
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})}
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}
## [1] "Gill"
## [1] "ME4"
## [1] "Plots are saved to the pathPlots"
## [1] "ME1"
## [1] "Plots are saved to the pathPlots"
## [1] "ME10"
## [1] "Plots are saved to the pathPlots"
## [1] "ME9"
## [1] "Plots are saved to the pathPlots"
## [1] "ME2"
## [1] "ME3"
## [1] "Plots are saved to the pathPlots"
## [1] "ME6"
## [1] "Plots are saved to the pathPlots"
## [1] "ME11"
## [1] "Plots are saved to the pathPlots"
## [1] "ME12"
## [1] "Plots are saved to the pathPlots"
## [1] "ME8"
## [1] "Plots are saved to the pathPlots"
## [1] "ME5"
## [1] "Plots are saved to the pathPlots"
## [1] "ME7"
## [1] "Plots are saved to the pathPlots"
## [1] "ME0"
## [1] "Plots are saved to the pathPlots"
#-
Here I did a Picrust2 analysis but it was not very informative, I guess there are just to many unknown species in Fish gills
#Austin, B., Austin, D. A., & Munn, C. B. (2007). Bacterial fish pathogens: disease of farmed and wild fish (Vol. 26, p. 552). Chichester: Springer.
PathogeneSpecies <-
c("Clostridium botulinum",
"Eubacterium tarantellae",
"Carnobacterium maltaromaticum",
"Enterococcus faecalis",
"Vagococcus salmoninarum",
"Lactobacillus" ,
"Lactococcus garvieae",
"Enterococcus seriolicida",
"Lactococcus piscium",
"Streptococcus dysgalactiae",
"Streptococcus agalactiae",
"Streptococcus ictaluri",
"Streptococcus parauberis",
"Streptococcus phocae",
"Renibacterium salmoninarum",
"Erysipelothrix rhusiopathiae",
"Bacillus",
"Bacillus cereus",
"Bacillus mycoides",
"Corynebacterium aquaticum",
"Weissella",
"Weissella ceti",
"Micrococcus",
"Mycobacterium",
"Mycobacterium abscessus",
"Mycobacterium anabanti",
"Mycobacterium avium",
"Mycobacterium chelonei",
"Mycobacterium fortuitum",
"Mycobacterium gordonae",
"Mycobacterium marinum",
"Mycobacterium montefiorense",
"Mycobacterium neoaurum",
"Mycobacterium piscium",
"Mycobacterium platypoecilus",
"Mycobacterium poriferae",
"Mycobacterium pseudoshottsii",
"Mycobacterium ranae",
"Mycobacterium salmoniphilum",
"Mycobacterium shottsii",
"Mycobacterium scrofulaceum",
"Mycobacterium simiae",
"Mycobacterium smegmatis",
"Mycobacterium ulcerans",
"Nocardia asteroides",
"Nocardia salmonicida",
"Nocardia seriolae",
"Rhodococcus",
"Rhodococcus erythropolis",
"Rhodococcus qingshengii",
"Planococcus",
"Staphylococcus epidermidis",
"Staphylococcus warneri",
"Aeromonas allosaccharophila",
"Aeromonas bestiarum",
"Aeromonas caviae",
"Aeromonas dhakensis",
"Aeromonas hydrophila",
"Aeromonas liquefaciens",
"Aeromonas punctata",
"Aeromonas jandaei",
"Aeromonas piscicola",
"Aeromonas salmonicida",
"Aeromonas sobria",
"Aeromonas schubertii",
"Aeromonas veronii",
"Pseudoalteromonas piscicida",
"Pseudoalteromonas undina",
"Shewanella putrefaciens",
"Arcobacter cryaerophilus",
"Delftia acidovorans",
"Citrobacter freundii",
"Edwardsiella anguillarum",
"Edwardsiella ictaluri",
"Edwarsiella piscicida",
"Edwardsiella tarda",
"Enterobacter cloacae",
"Plesiomonas shigelloides",
"Pantoea",
"Enterobacter agglomerans",
"Providencia rettgeri",
"Proteus rettgeri",
"Providencia vermicola",
"Salmonella enterica",
"Serratia liquefaciens",
"Serratia plymuthica",
"Yersinia ruckeri",
"Chryseobacterium aahli",
"Chryseobacterium balustinum",
"Flavobacterium balustinum",
"Chryseobacterium indologenes",
"Chryseobacterium scophthalmum",
"Chryseobacterium piscicola",
"Flavobacterium scophthalmum",
"Flavobacterium branchiophilum",
"Flavobacterium columnare",
"Flexibacter columnaris",
"Cytophaga columnaris" ,
"Flavobacterium hydatis",
"Cytophaga aquatilis",
"Flavobacterium johnsoniae" ,
"Cytophaga johnsonae",
"Flavobacterium oncorhynchi" ,
"Flavobacterium psychrophilum",
"Cytophaga psychrophile",
"Flavobacterium spartensis" ,
"Flavobacterium succinicans" ,
"Flectobacillus roseus" ,
"Tenacibaculum dicentrarchi",
"Tenacibaculum discolor" ,
"Tenacibaculum gallaicum",
"Tenacibaculum maritimum",
"Flexibacter maritimus",
"Tenacibaculum ovolyticum" ,
"Flexibacter ovolyticus",
"Tenacibaculum soleae" ,
"Francisella noatunensis" ,
"Francisella philomiragia",
"Francisella piscicida",
"Hahella chejuensis" ,
"Halomonas cupida",
"Deleya cupida" ,
"Acinetobacter johnsonii" ,
"Acinetobacter iwoffi" ,
"Moritella marina" ,
"Moritella viscosa" ,
"Mycoplasma" ,
"Myxococcus piscicola" ,
"Aquaspirillum",
"Janthinobacterium lividum ",
"Pasteurella skyensis" ,
"Piscirickettsia salmonis ",
"Pseudomonas aeruginosa ",
"Pseudomonas alcaligenes" ,
"Pseudomonas baetica" ,
"Pseudomonas chlororaphis ",
"Pseudomonas fluorescens" ,
"Pseudomonas koreensis" ,
"Pseudomonas mosselii" ,
"Pseudomonas plecoglossicida" ,
"Pseudomonas pseudoalcaligenes" ,
"Pseudomonas putida" ,
"Photobacterium damselae",
"Pasteurella piscicida",
"Aliivibrio fischeri" ,
"Aliivibrio logei" ,
"Aliivibrio salmonicida" ,
"Aliivibrio wodanis" ,
"Vibrio aestuarianus" ,
"Vibrio alginolyticus" ,
"Vibrio anguillarum" ,
"Listonella anguillarum",
"Vibrio furnissii",
"Vibrio harveyi",
"Vibrio carchariae" ,
"Vibrio trachuri" ,
"Vibrio ichthyoenteri",
"Vibrio mimicus" ,
"Vibrio ordalii",
"Vibrio parahaemolyticus",
"Vibrio ponticus",
"Vibrio scophthalmi" ,
"Vibrio splendidus" ,
"Vibrio tapetis" ,
"Vibrio vulnifi" ,
"Candidatus ctinochlamydia clariae",
"Candidatus Arthromitus",
"Candidatus Branchiomonas cisticola",
"Candidatus Clavochlamydia salmonicola",
"Candidatus Piscichlamydia salmonis",
"Candidatus Renichlamydia lutjanid",
"Candidatus Similichlamydia latridicola",
"Candidatus Syngnamydia Venezia",
"Streptobacillus" ,
"Varracalbmi",
"Candidatus Megaira") #Added by me
PathogeneGenus <-c(
"Clostridium",
"Eubacterium",
"Carnobacterium",
"Enterococcus",
"Vagococcus",
"Lactobacillus" ,
"Lactococcus",
"Enterococcus",
"Streptococcus",
"Renibacterium",
"Erysipelothrix",
"Bacillus",
"Corynebacterium",
"Weissella",
"Micrococcus",
"Mycobacterium",
"Nocardia",
"Rhodococcus",
"Planococcus",
"Staphylococcus",
"Aeromonas",
"Pseudoalteromonas",
"Shewanella",
"Arcobacter",
"Delftia",
"Citrobacter",
"Edwardsiella",
"Enterobacter",
"Plesiomonas",
"Pantoea",
"Enterobacter",
"Providencia",
"Salmonella",
"Serratia",
"Yersinia",
"Chryseobacterium",
"Flavobacterium",
"Flexibacter",
"Cytophaga",
"Flectobacillus" ,
"Tenacibaculum",
"Francisella" ,
"Hahella",
"Halomonas",
"Deleya" ,
"Acinetobacter" ,
"Moritella" ,
"Mycoplasma" ,
"Myxococcus" ,
"Aquaspirillum",
"Janthinobacterium",
"Pasteurella" ,
"Piscirickettsia",
"Pseudomonas",
"Photobacterium",
"Pasteurella",
"Aliivibrio" ,
"Vibrio" ,
"Listonella",
"Candidatus",
"Streptobacillus" ,
"Varracalbmi",
"Candidatus Megaira",
"Candidatus ctinochlamydia",
"Candidatus Arthromitus",
"Candidatus Branchiomonas",
"Candidatus Clavochlamydia",
"Candidatus Piscichlamydia",
"Candidatus Renichlamydia",
"Candidatus Similichlamydia",
"Candidatus Syngnamydia")
for (i in names(pslist)[grepl("ps_SLWF_21", names(pslist))]){
require(tidyverse)
require(phyloseq)
require(cowplot)
tryCatch({
g <- paste(i); print(g)
gg <- sub('ps_', '', g)
FILENAME <- paste(paste(save_name_16S, "Pathogenes_Genus_From_Literature", sep="_"),".png", sep="")
sample_data(pslist[[i]])$ReadNum <-rowSums(otu_table(pslist[[i]]))
Aps_alpha_barplot <- pslist[[i]] %>%
#tax_glom(taxrank = "Genus") %>% #merges species that have the same taxonomy at a certain rank
transform_sample_counts(function(x) {x/sum(x)*100} ) %>% # Transform to rel. abundance
psmelt() %>% # Melt to long format
#filter(Abundance > 0.05) %>% # Filter out low abundance taxa
arrange(Genus) # Sort data frame alphabetically by phylum
Aps_alpha_barplot$Reps <- sub(Species, "", Aps_alpha_barplot$Replicate2)
Aps_alpha_barplot$ASV <- sub(".*:", "", Aps_alpha_barplot$OTU)
Aps_alpha_barplot$ASV <- sub('\\.', ' ', Aps_alpha_barplot$ASV)
#print(Aps_alpha_barplot[grep("Megaira", Aps_alpha_barplot$ASV),]$ASV)
Aps_alpha_barplot_Pathogens <- Aps_alpha_barplot[Aps_alpha_barplot$ASV %in% PathogeneGenus,] #PathogeneSpecies
#print(Aps_alpha_barplot_Pathogens[grep("Megaira", Aps_alpha_barplot_Pathogens$ASV),]$ASV)
Aps_alpha_barplot_Pathogens<- Aps_alpha_barplot_Pathogens %>% arrange(SampleID)
#https://gist.github.com/svigneau/05148a7031172c2bc70d
labels <- unlist(lapply(as.character(Aps_alpha_barplot_Pathogens[!duplicated(Aps_alpha_barplot_Pathogens[,c("SampleID")]),]$ReadNum), function(x) c(x, rep("", length(unique(Aps_alpha_barplot_Pathogens$OTU))-1))))
Aps_alpha_barplot_Pathogens$Labels <- labels
print(head(Aps_alpha_barplot_Pathogens[, c("Labels", "ReadNum", "SampleID", "OTU")], 5))
df <- ddply(Aps_alpha_barplot_Pathogens, .(OTU), summarise,
OverallAbundance=(sum(Abundance)/length(unique((Aps_alpha_barplot_Pathogens$SampleID)))))
df <- df[order(df$OverallAbundance, decreasing = TRUE), ]
Aps_alpha_barplot_Pathogens <- left_join(Aps_alpha_barplot_Pathogens, df)
Aps_alpha_barplot_Pathogens <- Aps_alpha_barplot_Pathogens[order(Aps_alpha_barplot_Pathogens$OverallAbundance,
decreasing = TRUE), ]
top_20 <- df %>% top_n(20, OverallAbundance) %>% pull(OTU)
LocOrderSL=c(
"Medem Grund","Brunsbuettel","Schwarztonnen Sand","Muehlenberger Loch")
microbiomeseq_cols <- function(){
colours <- c("#F0A3FF", "#0075DC", "#993F00","#4C005C","#2BCE48","#FFCC99","#808080","#94FFB5","#8F7C00","#9DCC00",
"#C20088","#003380","#FFA405","#FFA8BB","#426600","#FF0010","#5EF1F2","#00998F","#740AFF","#990000",
"#FFFF00",
grey.colors(1000));return(colours)}
colours <- microbiomeseq_cols()
RepOrder <-c(
"WF","SU21MG","SU21BB", "SU21SS", "SU21ML")
# if (gg == "GC") {
# RepOrder <- GCOrder
# } else if (gg == "OE") {
# RepOrder <- OEOrder
# } else if (gg == "SL") {
# RepOrder <- SLOrder }
df <- Aps_alpha_barplot_Pathogens
p <-ggplot(df,
aes(x = SampleID, y = Abundance, fill = factor(df$OTU, levels = unique(df$OTU)))) +
geom_bar(stat = "identity") +
facet_grid(. ~ factor(Reps, levels=RepOrder), drop = TRUE, scale = "free", space = "free_x") +
#scale_x_discrete(labels = LocOrder) +
atheme2 +
ylab("Relative Abundance [%] \n") + xlab("Sample") +
#scale_color_manual(values= col.Palette.species2)+
#scale_fill_manual(values= col.Palette.species2)
ggtitle("Pathogenic Species rel. Abundance \nBacterial Communities by Sampling Site, ASVs aggregated at Species Level") +
scale_fill_manual(values = microbiomeseq_cols(), name= "ASVs of top 20", breaks = top_20) +
ggrepel::geom_text_repel(x = df$SampleID, y = df$Abundance, aes(label = Labels),
inherit.aes = FALSE, min.segment.length = 0,nudge_y = 20,size=3, max.overlaps = Inf) +
atheme +
theme(strip.text.y = element_text(angle = 0)) +
theme(axis.text.x=element_blank(),
#axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.title.y.left = element_blank(),
axis.ticks.y =element_blank()
#axis.title.x = element_blank()
) +
theme(
panel.background = element_rect(fill='transparent'), #transparent panel bg
plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
#panel.grid.major = element_blank(), #remove major gridlines
#panel.grid.minor = element_blank(), #remove minor gridlines
legend.background = element_rect(fill='transparent'), #transparent legend bg
legend.box.background = element_rect(fill='transparent') #transparent legend panel
) +
theme(axis.title.x.bottom = element_text(color="grey13"),
strip.text = element_text(color = "black", face= "bold"))
g <- ggplot_gtable(ggplot_build(p))
stripr <- which(grepl('strip-t', g$layout$name))
fills <- alpha(col.Palette[[Species]], 0.7)
k <- 1
for (iii in stripr) {
j <- which(grepl('rect', g$grobs[[iii]]$grobs[[1]]$childrenOrder))
g$grobs[[iii]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
k <- k+1}
prow <- cowplot::plot_grid(g, labels = c(""), ncol = 1)
AAA<- plot_grid(prow, ncol = 1, rel_heights = c(100))
ggsave(AAA, filename = FILENAME, path = pathPlots, device='png', dpi=300, width = 15,
height = 8)
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}
## [1] "ps_SLWF_21"
## Labels ReadNum SampleID OTU
## 1 16681 16681 SLSU21BBEB1 ASV132:Acinetobacter
## 2 16681 SLSU21BBEB1 ASV14296:Acinetobacter
## 3 16681 SLSU21BBEB1 ASV159:Acinetobacter
## 4 16681 SLSU21BBEB1 ASV1630:Acinetobacter
## 5 16681 SLSU21BBEB1 ASV1799:Acinetobacter
GroupOfInterest <- c("ASV13:Acinetobacter.lwoffii",
"ASV24:Acinetobacter",
"ASV25:Polynucleobacter",
"ASV40:Acinetobacter.johnsonii",
"ASV50:Candidatus Megaira",
"ASV213:Aeromonas",
"ASV218:Shewanella.putrefaciens",
"ASV1670:Pseudomonas.putida",
"ASV1718:Shewanella",
"ASV2290:Chryseobacterium.haifense")
for (i in names(pslist)[grepl("ps_WGCNA", names(pslist))]){
require(tidyverse)
require(phyloseq)
require(cowplot)
require(plyr)
tryCatch({
g <- paste(i); print(g)
gg <- sub('ps_', '', g)
FILENAME <- paste(paste(save_name_16S, "Pathogenic_ASVs_Genus_Level", sep="_"),".png", sep="")
sample_data(pslist[[i]])$ReadNum <-rowSums(otu_table(pslist[[i]]))
Aps_alpha_barplot <- pslist[[i]] %>%
#tax_glom(taxrank = "Genus") %>% #merges species that have the same taxonomy at a certain rank
transform_sample_counts(function(x) {x/sum(x)*100} ) %>% # Transform to rel. abundance
psmelt() #%>% # Melt to long format
#filter(Abundance > 0.05) %>% # Filter out low abundance taxa
#arrange(Species) # Sort data frame alphabetically by phylum
Aps_alpha_barplot$Reps <- sub(gg, "", Aps_alpha_barplot$Replicates)
Aps_alpha_barplot$ASV <- sub(".*:", "", Aps_alpha_barplot$OTU)
Aps_alpha_barplot$ASV <- sub('\\.', ' ', Aps_alpha_barplot$ASV)
#print(Aps_alpha_barplot[grep("Megaira", Aps_alpha_barplot$ASV),]$ASV)
GroupOfInterestSpecies <- sub(".*:", "", GroupOfInterest)
GroupOfInterestSpecies <-sub('\\.', ' ', GroupOfInterestSpecies)
######################
#PathogeneSpecies ASV#
Aps_alpha_barplot_Pathogens <- Aps_alpha_barplot[Aps_alpha_barplot$ASV %in% GroupOfInterestSpecies,]
################
#For Genus glom#
#Aps_alpha_barplot_Pathogens <- Aps_alpha_barplot[Aps_alpha_barplot$Genus %in% GroupOfInterestSpecies,]
#print(Aps_alpha_barplot_Pathogens[grep("Megaira", Aps_alpha_barplot_Pathogens$ASV),]$ASV)
Aps_alpha_barplot_Pathogens<- Aps_alpha_barplot_Pathogens %>% arrange(SampleID)
#https://gist.github.com/svigneau/05148a7031172c2bc70d
SLOrder <-c(
"SLSU21MG","SLSU21BB", "SLSU21SS", "SLSU21ML")
if (gg == "GC") {
RepOrder <- GCOrder
} else if (gg == "OE") {
RepOrder <- OEOrder
} else if (gg == "SL") {
RepOrder <- SLOrder }
labels <- unlist(lapply(as.character(Aps_alpha_barplot_Pathogens[!duplicated(Aps_alpha_barplot_Pathogens[,c("SampleID")]),]$ReadNum), function(x) c(x, rep("", length(unique(Aps_alpha_barplot_Pathogens$OTU))-1))))
Aps_alpha_barplot_Pathogens$Labels <- labels
print(head(Aps_alpha_barplot_Pathogens[, c("Labels", "ReadNum", "SampleID", "OTU")], 100))
df <- ddply(Aps_alpha_barplot_Pathogens, .(OTU), summarise,
OverallAbundance=(sum(Abundance)/length(unique((Aps_alpha_barplot_Pathogens$SampleID)))))
df <- df[order(df$OverallAbundance, decreasing = TRUE), ]
Aps_alpha_barplot_Pathogens <- left_join(Aps_alpha_barplot_Pathogens, df)
Aps_alpha_barplot_Pathogens <- Aps_alpha_barplot_Pathogens[order(Aps_alpha_barplot_Pathogens$OverallAbundance,
decreasing = TRUE), ]
top_20 <- df %>% top_n(20, OverallAbundance) %>% pull(OTU)
LocOrderSL=c(
"Medem Grund","Oste","Brunsbuettel","Schwarztonnen Sand","Muehlenberger Loch")
microbiomeseq_cols <- function(){
colours <- c("#F0A3FF", "#0075DC", "#993F00","#4C005C","#2BCE48","#FFCC99","#808080","#94FFB5","#8F7C00","#9DCC00",
"#C20088","#003380","#FFA405","#FFA8BB","#426600","#FF0010","#5EF1F2","#00998F","#740AFF","#990000",
"#FFFF00",
grey.colors(1000));return(colours)}
colours <- microbiomeseq_cols()
RepOrder <- SLOrder
df <- Aps_alpha_barplot_Pathogens
p<-ggplot(df,
aes(x = SampleID, y = Abundance, fill = factor(df$OTU, levels = unique(df$OTU)))) +
geom_bar(stat = "identity") +
facet_grid(. ~ factor(Reps, levels=RepOrder), drop = TRUE, scale = "free", space = "free_x") +
#scale_x_discrete(labels = LocOrder) +
atheme2 +
ylab("Relative Abundance [%] \n") + xlab("Sample") +
#scale_color_manual(values= col.Palette.species2)+
#scale_fill_manual(values= col.Palette.species2)
ggtitle("Pathogenic Species rel. Abundance \nBacterial Communities by Sampling Site, ASVs aggregated at Species Level") +
scale_fill_manual(values = microbiomeseq_cols(), name= "ASVs of top 20", breaks = top_20) +
ggrepel::geom_text_repel(x = df$SampleID, y = df$Abundance, aes(label = Labels),
inherit.aes = FALSE, min.segment.length = 0,nudge_y = 20,size=3, max.overlaps = Inf) +
atheme +
theme(strip.text.y = element_text(angle = 0)) +
theme(axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.title.y.left = element_blank(),
axis.ticks.y =element_blank()) +
theme(panel.background = element_rect(fill='transparent'), #transparent panel bg
plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
legend.background = element_rect(fill='transparent'), #transparent legend bg
legend.box.background = element_rect(fill='transparent')) + #transparent legend panel
theme(axis.title.x.bottom = element_text(color="grey13"),
strip.text = element_text(color = "black", face= "bold"))
g <- ggplot_gtable(ggplot_build(p))
stripr <- which(grepl('strip-t', g$layout$name))
fills <- alpha(col.Palette[[Species]], 0.7)
k <- 1
for (iii in stripr) {
j <- which(grepl('rect', g$grobs[[iii]]$grobs[[1]]$childrenOrder))
g$grobs[[iii]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
k <- k+1}
prow <- cowplot::plot_grid(g, labels = c(""), ncol = 1)
AAA<- plot_grid(prow, ncol = 1, rel_heights = c(100))
ggsave(AAA, filename = FILENAME, path = pathPlots, device='png', dpi=300, width = 15,
height = 8)
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}
## [1] "ps_WGCNA"
## Labels ReadNum SampleID OTU
## 1 16628 16628 SLSU21BBEB1 ASV227:Aeromonas
## 2 16628 SLSU21BBEB1 ASV211:Aeromonas
## 3 16628 SLSU21BBEB1 ASV330:Aeromonas
## 4 16628 SLSU21BBEB1 ASV520:Candidatus Megaira
## 5 16628 SLSU21BBEB1 ASV13:Acinetobacter.lwoffii
## 6 16628 SLSU21BBEB1 ASV130:Polynucleobacter
## 7 16628 SLSU21BBEB1 ASV10179:Shewanella
## 8 16628 SLSU21BBEB1 ASV10460:Candidatus Megaira
## 9 16628 SLSU21BBEB1 ASV10679:Polynucleobacter
## 10 16628 SLSU21BBEB1 ASV1123:Polynucleobacter
## 11 16628 SLSU21BBEB1 ASV1140:Candidatus Megaira
## 12 16628 SLSU21BBEB1 ASV1150:Aeromonas
## 13 16628 SLSU21BBEB1 ASV1240:Shewanella
## 14 16628 SLSU21BBEB1 ASV12665:Polynucleobacter
## 15 16628 SLSU21BBEB1 ASV132:Acinetobacter
## 16 16628 SLSU21BBEB1 ASV14296:Acinetobacter
## 17 16628 SLSU21BBEB1 ASV146:Shewanella
## 18 16628 SLSU21BBEB1 ASV1490:Shewanella
## 19 16628 SLSU21BBEB1 ASV1502:Polynucleobacter
## 20 16628 SLSU21BBEB1 ASV158:Aeromonas
## 21 16628 SLSU21BBEB1 ASV159:Acinetobacter
## 22 16628 SLSU21BBEB1 ASV1630:Acinetobacter
## 23 16628 SLSU21BBEB1 ASV1632:Shewanella
## 24 16628 SLSU21BBEB1 ASV1693:Pseudomonas.putida
## 25 16628 SLSU21BBEB1 ASV1734:Shewanella
## 26 16628 SLSU21BBEB1 ASV1799:Acinetobacter
## 27 16628 SLSU21BBEB1 ASV180:Polynucleobacter
## 28 16628 SLSU21BBEB1 ASV1841:Acinetobacter
## 29 16628 SLSU21BBEB1 ASV1843:Acinetobacter
## 30 16628 SLSU21BBEB1 ASV1948:Acinetobacter
## 31 16628 SLSU21BBEB1 ASV195:Acinetobacter.johnsonii
## 32 16628 SLSU21BBEB1 ASV2142:Shewanella
## 33 16628 SLSU21BBEB1 ASV2175:Acinetobacter
## 34 16628 SLSU21BBEB1 ASV218:Shewanella.putrefaciens
## 35 16628 SLSU21BBEB1 ASV221:Shewanella.putrefaciens
## 36 16628 SLSU21BBEB1 ASV2262:Candidatus Megaira
## 37 16628 SLSU21BBEB1 ASV2329:Chryseobacterium.haifense
## 38 16628 SLSU21BBEB1 ASV24:Acinetobacter
## 39 16628 SLSU21BBEB1 ASV25:Polynucleobacter
## 40 16628 SLSU21BBEB1 ASV259:Acinetobacter.lwoffii
## 41 16628 SLSU21BBEB1 ASV3013:Shewanella
## 42 16628 SLSU21BBEB1 ASV311:Shewanella
## 43 16628 SLSU21BBEB1 ASV32:Polynucleobacter
## 44 16628 SLSU21BBEB1 ASV3268:Shewanella
## 45 16628 SLSU21BBEB1 ASV331:Acinetobacter
## 46 16628 SLSU21BBEB1 ASV3328:Aeromonas
## 47 16628 SLSU21BBEB1 ASV382:Acinetobacter
## 48 16628 SLSU21BBEB1 ASV3932:Polynucleobacter
## 49 16628 SLSU21BBEB1 ASV40:Acinetobacter.johnsonii
## 50 16628 SLSU21BBEB1 ASV413:Shewanella
## 51 16628 SLSU21BBEB1 ASV4141:Acinetobacter
## 52 16628 SLSU21BBEB1 ASV415:Shewanella
## 53 16628 SLSU21BBEB1 ASV4290:Polynucleobacter
## 54 16628 SLSU21BBEB1 ASV438:Acinetobacter
## 55 16628 SLSU21BBEB1 ASV485:Aeromonas
## 56 16628 SLSU21BBEB1 ASV4863:Acinetobacter
## 57 16628 SLSU21BBEB1 ASV4873:Shewanella
## 58 16628 SLSU21BBEB1 ASV49:Candidatus Megaira
## 59 16628 SLSU21BBEB1 ASV5011:Shewanella
## 60 16628 SLSU21BBEB1 ASV5095:Shewanella
## 61 16628 SLSU21BBEB1 ASV579:Shewanella
## 62 16628 SLSU21BBEB1 ASV6047:Acinetobacter
## 63 16628 SLSU21BBEB1 ASV611:Polynucleobacter
## 64 16628 SLSU21BBEB1 ASV6554:Polynucleobacter
## 65 16628 SLSU21BBEB1 ASV666:Acinetobacter.johnsonii
## 66 16628 SLSU21BBEB1 ASV6742:Candidatus Megaira
## 67 16628 SLSU21BBEB1 ASV708:Shewanella
## 68 16628 SLSU21BBEB1 ASV768:Acinetobacter
## 69 16628 SLSU21BBEB1 ASV770:Acinetobacter
## 70 16628 SLSU21BBEB1 ASV7890:Shewanella
## 71 16628 SLSU21BBEB1 ASV798:Acinetobacter
## 72 16628 SLSU21BBEB1 ASV806:Aeromonas
## 73 16628 SLSU21BBEB1 ASV837:Shewanella
## 74 16628 SLSU21BBEB1 ASV846:Aeromonas
## 75 16628 SLSU21BBEB1 ASV939:Acinetobacter.johnsonii
## 76 16628 SLSU21BBEB1 ASV95:Acinetobacter
## 77 16628 SLSU21BBEB1 ASV956:Polynucleobacter
## 78 16628 SLSU21BBEB1 ASV964:Polynucleobacter
## 79 16628 SLSU21BBEB1 ASV977:Acinetobacter.johnsonii
## 80 16628 SLSU21BBEB1 ASV980:Acinetobacter
## 81 16628 SLSU21BBEB1 ASV981:Aeromonas
## 82 16628 SLSU21BBEB1 ASV99:Candidatus Megaira
## 83 23860 23860 SLSU21BBEB2 ASV211:Aeromonas
## 84 23860 SLSU21BBEB2 ASV13:Acinetobacter.lwoffii
## 85 23860 SLSU21BBEB2 ASV520:Candidatus Megaira
## 86 23860 SLSU21BBEB2 ASV158:Aeromonas
## 87 23860 SLSU21BBEB2 ASV227:Aeromonas
## 88 23860 SLSU21BBEB2 ASV382:Acinetobacter
## 89 23860 SLSU21BBEB2 ASV159:Acinetobacter
## 90 23860 SLSU21BBEB2 ASV330:Aeromonas
## 91 23860 SLSU21BBEB2 ASV40:Acinetobacter.johnsonii
## 92 23860 SLSU21BBEB2 ASV1734:Shewanella
## 93 23860 SLSU21BBEB2 ASV24:Acinetobacter
## 94 23860 SLSU21BBEB2 ASV180:Polynucleobacter
## 95 23860 SLSU21BBEB2 ASV25:Polynucleobacter
## 96 23860 SLSU21BBEB2 ASV49:Candidatus Megaira
## 97 23860 SLSU21BBEB2 ASV10179:Shewanella
## 98 23860 SLSU21BBEB2 ASV10460:Candidatus Megaira
## 99 23860 SLSU21BBEB2 ASV10679:Polynucleobacter
## 100 23860 SLSU21BBEB2 ASV1123:Polynucleobacter
GroupOfInterest <- c(
"ASV15:Verticiella",
"ASV1432:Verticiella",
"ASV306:Shewanella.baltica",
"ASV1632:Shewanella",
"ASV708:Shewanella",
"ASV413:Shewanella",
"ASV146:Shewanella",
"ASV485:Aeromonas",
"ASV977:Acinetobacter.johnsonii",
"ASV195:Acinetobacter.johnsonii",
"ASV95:Acinetobacter",
"ASV1105:Pseudomonas",
"ASV221:Shewanella.putrefaciens",
"ASV146:Shewanella",
"ASV846:Aeromonas",
"ASV806:Aeromonas",
"ASV3501:Chryseobacterium.piscicola",
"ASV306:Shewanella.baltica",
"ASV415:Shewanella"
)
for (i in names(pslist)[grepl("ps_WGCNA", names(pslist))]){
require(tidyverse)
require(phyloseq)
require(cowplot)
require(plyr)
tryCatch({
g <- paste(i); print(g)
gg <- sub('ps_', '', g)
FILENAME <- paste(paste(save_name_16S, "Pathogenic_ASVs_Level", sep="_"),".png", sep="")
sample_data(pslist[[i]])$ReadNum <-rowSums(otu_table(pslist[[i]]))
Aps_alpha_barplot <- pslist[[i]] %>%
#tax_glom(taxrank = "Genus") %>% #merges species that have the same taxonomy at a certain rank
transform_sample_counts(function(x) {x/sum(x)*100} ) %>% # Transform to rel. abundance
psmelt() #%>% # Melt to long format
#filter(Abundance > 0.05) %>% # Filter out low abundance taxa
#arrange(Species) # Sort data frame alphabetically by phylum
Aps_alpha_barplot$Reps <- sub(gg, "", Aps_alpha_barplot$Replicates)
Aps_alpha_barplot$ASV <- Aps_alpha_barplot$OTU #sub(".*:", "", Aps_alpha_barplot$OTU)
#Aps_alpha_barplot$ASV <- #sub('\\.', ' ', Aps_alpha_barplot$ASV)
#print(Aps_alpha_barplot[grep("Megaira", Aps_alpha_barplot$ASV),]$ASV)
#GroupOfInterestSpecies <- sub(".*:", "", GroupOfInterest)
#GroupOfInterestSpecies <-sub('\\.', ' ', GroupOfInterestSpecies)
######################
#PathogeneSpecies ASV#
Aps_alpha_barplot_Pathogens <- Aps_alpha_barplot[Aps_alpha_barplot$ASV %in% GroupOfInterest,]
################
#For Genus glom#
#Aps_alpha_barplot_Pathogens <- Aps_alpha_barplot[Aps_alpha_barplot$Genus %in% GroupOfInterestSpecies,]
#print(Aps_alpha_barplot_Pathogens[grep("Megaira", Aps_alpha_barplot_Pathogens$ASV),]$ASV)
Aps_alpha_barplot_Pathogens<- Aps_alpha_barplot_Pathogens %>% arrange(SampleID)
#https://gist.github.com/svigneau/05148a7031172c2bc70d
RepOrder <-c(
"SLSU21MG","SLSU21BB", "SLSU21SS", "SLSU21ML")
# if (gg == "GC") {
# RepOrder <- GCOrder
# } else if (gg == "OE") {
# RepOrder <- OEOrder
# } else if (gg == "SL") {
# RepOrder <- SLOrder }
labels <- unlist(lapply(as.character(Aps_alpha_barplot_Pathogens[!duplicated(Aps_alpha_barplot_Pathogens[,c("SampleID")]),]$ReadNum), function(x) c(x, rep("", length(unique(Aps_alpha_barplot_Pathogens$OTU))-1))))
Aps_alpha_barplot_Pathogens$Labels <- labels
print(head(Aps_alpha_barplot_Pathogens[, c("Labels", "ReadNum", "SampleID", "OTU")], 5))
df <- ddply(Aps_alpha_barplot_Pathogens, .(OTU), summarise,
OverallAbundance=(sum(Abundance)/length(unique((Aps_alpha_barplot_Pathogens$SampleID)))))
df <- df[order(df$OverallAbundance, decreasing = TRUE), ]
Aps_alpha_barplot_Pathogens <- left_join(Aps_alpha_barplot_Pathogens, df)
Aps_alpha_barplot_Pathogens <- Aps_alpha_barplot_Pathogens[order(Aps_alpha_barplot_Pathogens$OverallAbundance,
decreasing = TRUE), ]
top_20 <- df %>% top_n(20, OverallAbundance) %>% pull(OTU)
LocOrderSL=c(
"Medem Grund","Brunsbuettel","Schwarztonnen Sand","Muehlenberger Loch")
microbiomeseq_cols <- function(){
colours <- c("#F0A3FF", "#0075DC", "#993F00","#4C005C","#2BCE48","#FFCC99","#808080","#94FFB5","#8F7C00","#9DCC00",
"#C20088","#003380","#FFA405","#FFA8BB","#426600","#FF0010","#5EF1F2","#00998F","#740AFF","#990000",
"#FFFF00",
grey.colors(1000));return(colours)}
colours <- microbiomeseq_cols()
RepOrder <- SLOrder
df <- Aps_alpha_barplot_Pathogens
p<-ggplot(df,
aes(x = SampleID, y = Abundance, fill = factor(df$OTU, levels = unique(df$OTU)))) +
geom_bar(stat = "identity") +
facet_grid(. ~ factor(Reps, levels=RepOrder), drop = TRUE, scale = "free", space = "free_x") +
#scale_x_discrete(labels = LocOrder) +
atheme2 +
ylab("Relative Abundance [%] \n") + xlab("Sample") +
#scale_color_manual(values= col.Palette.species2)+
#scale_fill_manual(values= col.Palette.species2)
ggtitle("Pathogenic Species rel. Abundance \nBacterial Communities by Sampling Site, ASVs aggregated at Species Level") +
scale_fill_manual(values = microbiomeseq_cols(), name= "ASVs of top 20", breaks = top_20) +
ggrepel::geom_text_repel(x = df$SampleID, y = df$Abundance, aes(label = Labels),
inherit.aes = FALSE, min.segment.length = 0,nudge_y = 20,size=3, max.overlaps = Inf) +
atheme +
theme(strip.text.y = element_text(angle = 0)) +
theme(axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.title.y.left = element_blank(),
axis.ticks.y =element_blank()) +
theme(panel.background = element_rect(fill='transparent'), #transparent panel bg
plot.background = element_rect(fill='transparent', color=NA), #transparent plot bg
legend.background = element_rect(fill='transparent'), #transparent legend bg
legend.box.background = element_rect(fill='transparent')) + #transparent legend panel
theme(axis.title.x.bottom = element_text(color="grey13"),
strip.text = element_text(color = "black", face= "bold"))
g <- ggplot_gtable(ggplot_build(p))
stripr <- which(grepl('strip-t', g$layout$name))
fills <- alpha(col.Palette[[Species]], 0.7)
k <- 1
for (iii in stripr) {
j <- which(grepl('rect', g$grobs[[iii]]$grobs[[1]]$childrenOrder))
g$grobs[[iii]]$grobs[[1]]$children[[j]]$gp$fill <- fills[k]
k <- k+1}
prow <- cowplot::plot_grid(g, labels = c(""), ncol = 1)
AAA<- plot_grid(prow, ncol = 1, rel_heights = c(100))
ggsave(AAA, filename = FILENAME, path = pathPlots, device='png', dpi=300, width = 15,
height = 8)
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}
## [1] "ps_WGCNA"
## Labels ReadNum SampleID OTU
## 1 16628 16628 SLSU21BBEB1 ASV15:Verticiella
## 2 16628 SLSU21BBEB1 ASV1105:Pseudomonas
## 3 16628 SLSU21BBEB1 ASV1432:Verticiella
## 4 16628 SLSU21BBEB1 ASV146:Shewanella
## 5 16628 SLSU21BBEB1 ASV1632:Shewanella
#-
Individual Co-Expression networks
# The following setting is important, do not omit.
options(stringsAsFactors = FALSE);
#Read in the female liver data set
set.seed(123)
Species <- "SL"
Year <- "2021"
Season <- "Summer"
Type <- "RNA"
Tissue <- "Gill"
Analysis <- "WGCNA"
alpha <- 0.05
OperatingSystem <- "Windows"
prefix <- "RNAseq-"
#####################
Tissue <- "Gill"
variable <- "Replicates"
#####################
save_name_RNA_Gill <- paste(Species, Year,Season, Tissue, Type, Analysis, sep = "_")
paste0(file.path(path_Output_WGCNA, paste(Analysis, "_", sep="")), save_name_RNA_Gill, ".RData")
## [1] "/Users/admin/Desktop/ElbeEstuarineZander/SL_Output_WGCNA/WGCNA_SL_2021_Summer_Gill_RNA_WGCNA.RData"
##################
#Sample selection#
##################
cat(paste(shQuote(SAMDF_RNA_Gill$SampleID, type="cmd"), collapse=", "))
## "SLSU21MGEB1", "SLSU21MGEB2", "SLSU21MGEB3", "SLSU21MGEB4", "SLSU21MGEB5", "SLSU21MGEB7", "SLSU21BBEB1", "SLSU21BBEB2", "SLSU21BBEB4", "SLSU21BBEB6", "SLSU21BBEB7", "SLSU21BBEB9", "SLSU21SSEB2", "SLSU21SSEB3", "SLSU21SSEB5", "SLSU21SSEB6", "SLSU21SSEB7", "SLSU21SSEB9", "SLSU21MLEB1", "SLSU21MLEB2", "SLSU21MLEB5", "SLSU21MLEB6", "SLSU21MLEB7", "SLSU21MLEB9"
# "SLSU21MLEB1", "SLSU21MLEB2", "SLSU21MLEB5", "SLSU21MLEB6", "SLSU21MLEB7", "SLSU21MLEB9")
Samples_RNA_Gill_All<-c(
"SLSU21MGEB1", "SLSU21MGEB2", "SLSU21MGEB3", "SLSU21MGEB4", "SLSU21MGEB5", "SLSU21MGEB7",
"SLSU21BBEB1", "SLSU21BBEB2", "SLSU21BBEB4", "SLSU21BBEB6", "SLSU21BBEB9", "SLSU21BBEB7",
"SLSU21SSEB2", "SLSU21SSEB3", "SLSU21SSEB5", "SLSU21SSEB6", "SLSU21SSEB7", "SLSU21SSEB9",
"SLSU21MLEB1", "SLSU21MLEB2", "SLSU21MLEB5", "SLSU21MLEB6", "SLSU21MLEB7", "SLSU21MLEB9")
Samples_RNA_Gill<-c(
"SLSU21MGEB1", "SLSU21MGEB2", "SLSU21MGEB4", "SLSU21MGEB7",
"SLSU21BBEB1", "SLSU21BBEB2", "SLSU21BBEB4", "SLSU21BBEB6", "SLSU21BBEB9", "SLSU21BBEB7",
"SLSU21SSEB2", "SLSU21SSEB3", "SLSU21SSEB5", "SLSU21SSEB7", "SLSU21SSEB9",
"SLSU21MLEB1", "SLSU21MLEB2", "SLSU21MLEB5", "SLSU21MLEB6", "SLSU21MLEB7", "SLSU21MLEB9")
#"SLSU21MGEB5" outlier in sample Clustering
#"SLSU21SSEB6", doubble outlier from the group in Liver & Gill
#"SLSU21MGEB3", doubble outlier from the group in Liver & Gill
Samples_RNA_Gill_List <- list("Samples_RNA_Gill_All" = Samples_RNA_Gill_All, "Samples_RNA_Gill"=Samples_RNA_Gill)
WGCNA_RNA_Gill <- list()
for (Gills in names(Samples_RNA_Gill_List)){
WGCNA_RNA_Gill[[Gills]] <- list()
Samples <- Samples_RNA_Gill_List[[Gills]]
SAMDF_RNA_Gill <- SAMDF_RNA_Gill[SAMDF_RNA_Gill$SampleID %in% Samples,]
SAMDF_RNA_Gill <-SAMDF_RNA_Gill %>% arrange(factor(rownames(SAMDF_RNA_Gill), levels = Samples))
rownames(SAMDF_RNA_Gill) <-SAMDF_RNA_Gill$SampleID
#=====================================================================================
# ExperimentalDesign & Filter
#=====================================================================================
cnt_RNA_Gill <- cnt_RNA_Gill[,SAMDF_RNA_Gill$SampleID]
#ExperimentalDesign
library(DESeq2)
dds_RNA_Gill <- DESeqDataSetFromMatrix(countData = cnt_RNA_Gill,
colData = SAMDF_RNA_Gill,
design = ~ Replicates)
vst_RNA_Gill <- vst(dds_RNA_Gill, blind = FALSE)
##################
# SampleDist PCAs#
##################
vst_RNA_list <- list("vst_RNA_Gill" =vst_RNA_Gill)
for (i in names(vst_RNA_list)[grepl("vst", names(vst_RNA_list))]){
require(plyr)
require(ggrepel)
require(cowplot)
if (OperatingSystem == "Mac" ) {
quartz() }
tryCatch({
g <- paste(i)
gg <- gsub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", i)
RNA_vst_PCA_Gill <-pcaplotRK3(vst_RNA_list[[i]],intgroup = c("Replicates"), pcX = 1, pcY = 2, title="",
ellipse = TRUE, ellipse.prob = 0.5) +
scale_fill_manual(values=col.Palette$SL) + #col.Palette.SeqCenter #col.Palette.Cruises
scale_color_manual(values=col.Palette$SL) + atheme +
theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
theme(
panel.grid.major = element_line(colour = "grey50"),
panel.grid.minor = element_line(colour = "grey50"))
prow <- cowplot::plot_grid(RNA_vst_PCA_Gill, labels = c(""), ncol = 1)
title <- ggdraw() + draw_label_themeRKwhite(paste(Species, gg, Type, Gills), element = "plot.title",x = 0.05, hjust = 0, vjust = 1)
subtitle <- ggdraw() + draw_label_themeRKwhite(paste("vst-PCA", "with", ... =
length(rownames(vst_RNA_list[[i]])),"genes",sep = " "), element = "plot.subtitle",x = 0.05, hjust = 0,
vjust = 1)
RNA_vst_PCA_Gill<- plot_grid(title, subtitle, prow, ncol = 1, rel_heights = c(0, 0.05, 0.989))
ggsave(RNA_vst_PCA_Gill, filename = paste(paste(save_name_RNA_Gill, "vst-PCA", Gills, sep="_"),".png", sep=""), path = pathPlots ,
device='png', dpi=300, width = 7,
height = 7)
plot(RNA_vst_PCA_Gill)
WGCNA_RNA_Gill[[Gills]]$RNA_vst_PCA_Gill <- RNA_vst_PCA_Gill
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}
#######################
### 3.1.2 Clustering###
#######################
########################################
# Get overview of normalized data#
# dds <- DESeq(dds)
# VST <- getVarianceStabilizedData(dds)
# rv_VST <- rowVars(VST)
# summary(rv_VST)
# q75_VST <- quantile( rowVars(VST), .75) # <= original
# expr_normalized <- VST[ rv_VST > q75_VST, ]
#
# expr_normalized_df <- data.frame(expr_normalized) %>%
# mutate(Gene_id = row.names(expr_normalized)) %>% pivot_longer(-Gene_id)
#
# expr_normalized_df %>% ggplot(., aes(x = name, y = value)) +
# geom_violin() +
# geom_point() +
# theme_bw() +
# theme(axis.text.x = element_text( angle = 90)) +
# ylim(0, NA) +
# labs(
# title = "Normalized and 75 quantile Expression",
# x = "treatment",y = "normalized expression")
####################################################
#=====================================================================================
# Code chunk 3
#=====================================================================================
library(WGCNA)
#rlog-Transformation
omics_data0 <-data.frame(t(assay(vst_RNA_Gill)))
a <- length(omics_data0)
gsg = goodSamplesGenes(omics_data0, verbose = 3);
gsg$allOK
#=====================================================================================
# Code chunk 4
#=====================================================================================
if (!gsg$allOK) {
# Optionally, print the gene and sample names that were removed:
if (sum(!gsg$goodGenes)>0)
print(paste("Removing genes"));
if (sum(!gsg$goodSamples)>0)
print(paste("Removing samples:"));
# Remove the offending genes and samples from the data:
omics_data0 = omics_data0[gsg$goodSamples, gsg$goodGenes]
}
b <- length(omics_data0)
print(paste("genes before", a))
print(paste("genes before", b))
print(paste("genes before", b-a))
#=====================================================================================
# Code chunk 5
#=====================================================================================
sampleTree = hclust(dist(omics_data0), method = "average");
# Plot the sample tree: Open a graphic output window of size 12 by 9 inches
# The user should change the dimensions if the window is too large or too small.
sizeGrWindow(12,9)
#pdf(file = "Plots/sampleClustering.pdf", width = 12, height = 9);
par(cex = 0.6);
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,
cex.axis = 1.5, cex.main = 2)
#=====================================================================================
# Code chunk 6
#=====================================================================================
# Plot a line to show the cut
abline(h = 100, col = "red");
# Determine cluster under the line
clust = cutreeStatic(sampleTree, cutHeight = 100, minSize = 10)
table(clust)
# clust 1 contains the samples we want to keep.
recordPlot() -> SampleClusteringTreePlot
#in Case we would exclude outliers from the tree:
#keepSamples = (clust==1)
#omics_data0 = omics_data0[keepSamples, ]
#Keep all samples
omics_data = omics_data0
nGenes = ncol(omics_data)
nSamples = nrow(omics_data)
#=====================================================================================
# Code chunk 7
#=====================================================================================
datTraits<-SAMDF_RNA_Gill[,traitData]
datTraits <- datTraits[rownames(datTraits) %in% rownames(omics_data),]
collectGarbage()
#=====================================================================================
# Code chunk 8
#=====================================================================================
# Re-cluster samples
sampleTree2 = hclust(dist(omics_data), method = "average")
# Convert traits to a color representation: white means low, red means high, grey means missing entry
traitColors = numbers2colors(datTraits, signed = FALSE)
# Plot the sample dendrogram and the colors underneath.
plotDendroAndColors(sampleTree2, traitColors,
groupLabels = names(datTraits),
main = "Sample dendrogram and trait heatmap")
recordPlot() -> SampleClusteringTreeTraitsPlot
#=====================================================================================
# Code chunk 9
#=====================================================================================
WGCNA_RNA_Gill[[Gills]]$omics_data <- omics_data
WGCNA_RNA_Gill[[Gills]]$datTraits <- datTraits
WGCNA_RNA_Gill[[Gills]]$SAMDF_RNA_Gill <- SAMDF_RNA_Gill
WGCNA_RNA_Gill[[Gills]]$SampleClusteringTreePlot <- SampleClusteringTreePlot
WGCNA_RNA_Gill[[Gills]]$SampleClusteringTreeTraitsPlot <- SampleClusteringTreeTraitsPlot
}
## [1] "Adapted from Marini F, Binder H (2019). “pcaExplorer: an R/Bioconductor package for interacting with RNA-seq principal components.” BMC Bioinformatics, 20(1), 331. doi:10.1186/s12859-019-2879-1, https://bioconductor.org/packages/pcaExplorer/."
## Flagging genes and samples with too many missing values...
## ..step 1
## ..Excluding 3333 genes from the calculation due to too many missing samples or zero variance.
## ..step 2
## [1] "Removing genes"
## [1] "genes before 32916"
## [1] "genes before 29583"
## [1] "genes before -3333"
## [1] "Adapted from Marini F, Binder H (2019). “pcaExplorer: an R/Bioconductor package for interacting with RNA-seq principal components.” BMC Bioinformatics, 20(1), 331. doi:10.1186/s12859-019-2879-1, https://bioconductor.org/packages/pcaExplorer/."
## Flagging genes and samples with too many missing values...
## ..step 1
## ..Excluding 3489 genes from the calculation due to too many missing samples or zero variance.
## ..step 2
## [1] "Removing genes"
## [1] "genes before 32916"
## [1] "genes before 29427"
## [1] "genes before -3489"
SAMDF_RNA_Gill <-WGCNA_RNA_Gill$Samples_RNA_Gill$SAMDF_RNA_Gill
omics_data<- WGCNA_RNA_Gill$Samples_RNA_Gill$omics_data
datTraits<- WGCNA_RNA_Gill$Samples_RNA_Gill$datTraits
save(omics_data, datTraits, file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Gill, "dataInput", Date, sep="_"), ".RData", sep=""))))
##############
#Summary Plot#
##############
cowplot::plot_grid(WGCNA_RNA_Gill$Samples_RNA_Gill_All$SampleClusteringTreePlot, WGCNA_RNA_Gill$Samples_RNA_Gill$SampleClusteringTreePlot, labels = c("A", "B"), rel_heights = c(1,1), rel_widths = c(1,0.8), ncol = 2) -> part_1
part_1
cowplot::plot_grid(WGCNA_RNA_Gill$Samples_RNA_Gill_All$RNA_vst_PCA_Gill, WGCNA_RNA_Gill$Samples_RNA_Gill$RNA_vst_PCA_Gill, labels = c("C", "D"), rel_heights = c(1,1), rel_widths = c(1,1), ncol = 2) -> part_2
part_2
cowplot::plot_grid(part_1, part_2, ncol = 1) -> part_3
ggsave(part_3, filename = paste(paste(save_name_RNA_Gill, "SSU_DataInputPlot", Date, sep="_"),".png", sep=""), path = pathPlots , device='png', dpi=300, width = 12, height = 12)
part_3
Soft Threshold Find the soft thresholding power beta to which co-expression similarity is raised to calculate adjacency, based on the criterion of approximate scale-free topology.
The general guidance for WGCNA and hdWGCNA is to pick the lowest soft power threshold that has a Scale Free Topology Model Fit greater than or equal to 0.8, so in this case we would select our soft power threshold as 9. Later on, the ConstructNetwork will automatically select the soft power threshold if the user does not provide one.
#=====================================================================================
# Code chunk 1
#=====================================================================================
# Load the WGCNA package
library(WGCNA)
# The following setting is important, do not omit.
options(stringsAsFactors = FALSE);
# Allow multi-threading within WGCNA. At present this call is necessary.
# Any error here may be ignored but you may want to update WGCNA if you see one.
# Caution: skip this line if you run RStudio or other third-party R environments.
# See note above.
#enableWGCNAThreads()
# Load the data saved in the first part
lnames = load(file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Gill, "dataInput", Date, sep="_"), ".RData", sep=""))))
#The variable lnames contains the names of loaded variables.
lnames
## [1] "omics_data" "datTraits"
#=====================================================================================
# Code chunk 2
#=====================================================================================
#from Strand et al, 2021
# Choose a set of soft-thresholding powers
powers = c(c(1:10), seq(from = 12, to=20, by=2))
# Call the network topology analysis function
allowWGCNAThreads() #needed otherwise would not work! https://www.biostars.org/p/122349/
## Allowing multi-threading with up to 8 threads.
sft = pickSoftThreshold(omics_data, powerVector = powers, verbose = 5)
## pickSoftThreshold: will use block size 1520.
## pickSoftThreshold: calculating connectivity for given powers...
## ..working on genes 1 through 1520 of 29427
## ..working on genes 1521 through 3040 of 29427
## ..working on genes 3041 through 4560 of 29427
## ..working on genes 4561 through 6080 of 29427
## ..working on genes 6081 through 7600 of 29427
## ..working on genes 7601 through 9120 of 29427
## ..working on genes 9121 through 10640 of 29427
## ..working on genes 10641 through 12160 of 29427
## ..working on genes 12161 through 13680 of 29427
## ..working on genes 13681 through 15200 of 29427
## ..working on genes 15201 through 16720 of 29427
## ..working on genes 16721 through 18240 of 29427
## ..working on genes 18241 through 19760 of 29427
## ..working on genes 19761 through 21280 of 29427
## ..working on genes 21281 through 22800 of 29427
## ..working on genes 22801 through 24320 of 29427
## ..working on genes 24321 through 25840 of 29427
## ..working on genes 25841 through 27360 of 29427
## ..working on genes 27361 through 28880 of 29427
## ..working on genes 28881 through 29427 of 29427
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1 1 0.324 -2.05 0.858 6480.00 6270.000 10100
## 2 2 0.687 -2.20 0.924 2210.00 2000.000 5040
## 3 3 0.788 -2.18 0.942 950.00 784.000 2990
## 4 4 0.815 -2.16 0.947 472.00 351.000 1940
## 5 5 0.843 -2.11 0.961 260.00 175.000 1340
## 6 6 0.853 -2.07 0.968 155.00 99.700 969
## 7 7 0.860 -2.02 0.977 98.60 56.900 720
## 8 8 0.859 -2.00 0.982 65.90 33.700 548
## 9 9 0.864 -1.96 0.988 46.10 20.900 425
## 10 10 0.858 -1.93 0.989 33.50 13.500 334
## 11 12 0.828 -1.84 0.974 19.60 6.120 215
## 12 14 0.783 -1.61 0.886 12.90 2.960 145
## 13 16 0.967 -1.16 0.962 9.36 1.520 103
## 14 18 0.942 -1.16 0.946 7.37 0.827 102
## 15 20 0.897 -1.12 0.931 6.17 0.465 102
# Plot the results:
# Find the soft thresholding power beta to which co-expression similarity is raised to calculate adjacency.
# based on the criterion of approximate scale-free topology.
idx <- min(which((-sign(sft$fitIndices[,3])*sft$fitIndices[,2]) > 0.90))
if(is.infinite(idx)){
idx <- min(which((-sign(sft$fitIndices[,3])*sft$fitIndices[,2]) > 0.80))
if(!is.infinite(idx)){
st <- sft$fitIndices[idx,1]
} else{
idx <- which.max(-sign(sft$fitIndices[,3])*sft$fitIndices[,2])
st <- sft$fitIndices[idx,1]
}
} else{st <- sft$fitIndices[idx,1]}
# Plot Scale independence measure and Mean connectivity measure
# Scale-free topology fit index as a function of the soft-thresholding power
data.frame(Indices = sft$fitIndices[,1],
sfApprox = -sign(sft$fitIndices[,3])*sft$fitIndices[,2]) %>%
ggplot() +
geom_hline(yintercept = 0.9, color = "red", alpha = 0.6) + # corresponds to R^2 cut-off of 0.9
geom_hline(yintercept = 0.8, color = "red", alpha = 0.2) + # corresponds to R^2 cut-off of 0.8
geom_line(aes(x = Indices, y = sfApprox), color = "red", alpha = 0.1, size = 2.5) +
geom_text(mapping = aes(x = Indices, y = sfApprox, label = Indices), color = "red", size = 4) +
ggtitle("Scale independence") +
xlab("Soft Threshold (power)") +
ylab("SF Model Fit,signed R^2") +
xlim(1,20) +
ylim(-1,1) +
geom_segment(aes(x = st, y = 0.25, xend = st, yend = sfApprox[idx]-0.05),
arrow = arrow(length = unit(0.2,"cm")),
size = 0.5)-> scale_independence_plot
# Mean connectivity as a function of the soft-thresholding power
data.frame(Indices = sft$fitIndices[,1],
meanApprox = sft$fitIndices[,5]) %>%
ggplot() +
geom_line(aes(x = Indices, y = meanApprox), color = "red", alpha = 0.1, size = 2.5) +
geom_text(mapping = aes(x = Indices, y = meanApprox, label = Indices), color = "red", size = 4) +
xlab("Soft Threshold (power)") +
ylab("Mean Connectivity") +
geom_segment(aes(x = st-0.4,
y = sft$fitIndices$mean.k.[idx],
xend = 0,
yend = sft$fitIndices$mean.k.[idx]),
arrow = arrow(length = unit(0.2,"cm")),
size = 0.4) +
ggtitle(paste0("Mean connectivity: ",
round(sft$fitIndices$mean.k.[idx],2))) -> mean_connectivity_plot
si_mc_plot <- cowplot::plot_grid(scale_independence_plot, mean_connectivity_plot, ncol = 2, align = "h", labels = c("A", "B"))
si_mc_plot
ggsave(si_mc_plot, filename = paste(paste(save_name_RNA_Gill, "soft_thresholding_power", sep="_"),".png", sep=""), path = pathPlots,
device='png', dpi=300, width = 8, height = 6)
softPower = st; print(paste("SoftPower chosen to", softPower))
## [1] "SoftPower chosen to 16"
softPower <- 6
deepSplit integer value between 0 and 4. Provides a simplified control over how sensitive module detection should be to module splitting, with 0 least and 4 most sensitive. See cutreeDynamic for more details.
##################
#blockwiseModules#
##################
softPower = softPower; print(paste("SoftPower chosen to", softPower))
## [1] "SoftPower chosen to 6"
#https://www.biostars.org/p/305714/
#there is a problem with cor form stats and cor from WGCNA, restart R
network = blockwiseModules(omics_data,
power = softPower,
networkType = "signed",
TOMType = "signed",
corType = "bicor",
minModuleSize = 30, #for genes 30
reassignThreshold = 0,
deepSplit = 2,
mergeCutHeight = 0.25,
numericLabels = TRUE,
pamRespectsDendro = FALSE,
saveTOMs = TRUE,
saveTOMFileBase = paste(Species,Tissue,Type,Season, "TOM", sep="_"),
verbose = 3)
## Calculating module eigengenes block-wise from all genes
## Flagging genes and samples with too many missing values...
## ..step 1
## ....pre-clustering genes to determine blocks..
## Projective K-means:
## ..k-means clustering..
## ..merging smaller clusters...
## Block sizes:
## gBlocks
## 1 2 3 4 5 6 7
## 4996 4871 4861 4545 4073 3706 2375
## ..Working on block 1 .
## TOM calculation: adjacency..
## ..will use 8 parallel threads.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
## ..saving TOM for block 1 into file SL_Gill_RNA_Summer_TOM-block.1.RData
## ....clustering..
## ....detecting modules..
## ....calculating module eigengenes..
## ....checking kME in modules..
## ..removing 7 genes from module 1 because their KME is too low.
## ..removing 13 genes from module 2 because their KME is too low.
## ..removing 2 genes from module 3 because their KME is too low.
## ..removing 2 genes from module 4 because their KME is too low.
## ..Working on block 2 .
## TOM calculation: adjacency..
## ..will use 8 parallel threads.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
## ..saving TOM for block 2 into file SL_Gill_RNA_Summer_TOM-block.2.RData
## ....clustering..
## ....detecting modules..
## ....calculating module eigengenes..
## ....checking kME in modules..
## ..removing 131 genes from module 1 because their KME is too low.
## ..removing 20 genes from module 2 because their KME is too low.
## ..removing 45 genes from module 3 because their KME is too low.
## ..removing 22 genes from module 4 because their KME is too low.
## ..removing 28 genes from module 5 because their KME is too low.
## ..removing 17 genes from module 6 because their KME is too low.
## ..removing 16 genes from module 7 because their KME is too low.
## ..removing 11 genes from module 9 because their KME is too low.
## ..removing 159 genes from module 10 because their KME is too low.
## ..removing 1 genes from module 12 because their KME is too low.
## ..removing 1 genes from module 13 because their KME is too low.
## ..Working on block 3 .
## TOM calculation: adjacency..
## ..will use 8 parallel threads.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
## ..saving TOM for block 3 into file SL_Gill_RNA_Summer_TOM-block.3.RData
## ....clustering..
## ....detecting modules..
## ....calculating module eigengenes..
## ....checking kME in modules..
## ..removing 153 genes from module 1 because their KME is too low.
## ..removing 68 genes from module 2 because their KME is too low.
## ..removing 80 genes from module 3 because their KME is too low.
## ..removing 30 genes from module 4 because their KME is too low.
## ..removing 28 genes from module 6 because their KME is too low.
## ..removing 12 genes from module 7 because their KME is too low.
## ..Working on block 4 .
## TOM calculation: adjacency..
## ..will use 8 parallel threads.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
## ..saving TOM for block 4 into file SL_Gill_RNA_Summer_TOM-block.4.RData
## ....clustering..
## ....detecting modules..
## ....calculating module eigengenes..
## ....checking kME in modules..
## ..removing 339 genes from module 1 because their KME is too low.
## ..removing 99 genes from module 3 because their KME is too low.
## ..removing 72 genes from module 4 because their KME is too low.
## ..removing 117 genes from module 5 because their KME is too low.
## ..removing 273 genes from module 7 because their KME is too low.
## ..removing 286 genes from module 8 because their KME is too low.
## ..Working on block 5 .
## TOM calculation: adjacency..
## ..will use 8 parallel threads.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
## ..saving TOM for block 5 into file SL_Gill_RNA_Summer_TOM-block.5.RData
## ....clustering..
## ....detecting modules..
## ....calculating module eigengenes..
## ....checking kME in modules..
## ..removing 233 genes from module 1 because their KME is too low.
## ..removing 178 genes from module 2 because their KME is too low.
## ..removing 414 genes from module 3 because their KME is too low.
## ..removing 420 genes from module 4 because their KME is too low.
## ..removing 362 genes from module 5 because their KME is too low.
## ..removing 23 genes from module 6 because their KME is too low.
## ..removing 216 genes from module 8 because their KME is too low.
## ..removing 19 genes from module 9 because their KME is too low.
## ..Working on block 6 .
## TOM calculation: adjacency..
## ..will use 8 parallel threads.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
## ..saving TOM for block 6 into file SL_Gill_RNA_Summer_TOM-block.6.RData
## ....clustering..
## ....detecting modules..
## ....calculating module eigengenes..
## ....checking kME in modules..
## ..removing 442 genes from module 1 because their KME is too low.
## ..removing 119 genes from module 2 because their KME is too low.
## ..removing 79 genes from module 3 because their KME is too low.
## ..removing 59 genes from module 4 because their KME is too low.
## ..removing 327 genes from module 5 because their KME is too low.
## ..removing 348 genes from module 6 because their KME is too low.
## ..removing 12 genes from module 7 because their KME is too low.
## ..removing 1 genes from module 8 because their KME is too low.
## ..Working on block 7 .
## TOM calculation: adjacency..
## ..will use 8 parallel threads.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
## ..saving TOM for block 7 into file SL_Gill_RNA_Summer_TOM-block.7.RData
## ....clustering..
## ....detecting modules..
## ....calculating module eigengenes..
## ....checking kME in modules..
## ..removing 245 genes from module 4 because their KME is too low.
## ..removing 238 genes from module 5 because their KME is too low.
## ..removing 37 genes from module 6 because their KME is too low.
## ..removing 47 genes from module 7 because their KME is too low.
## ..merging modules that are too close..
## mergeCloseModules: Merging modules whose distance is less than 0.25
## Calculating new MEs...
#Strand et., 2021
# network <- blockwiseModules(omics_data,
# power = st,
# networkType = "signed",
# TOMType = "signed",
# corType = "bicor",
# maxPOutliers = 0.05,
# deepSplit = 4, # Default 2
# minModuleSize = 2, # 30
# minCoreKME = 0.5, # Default 0.5
# minCoreKMESize = 2, # Default minModuleSize/3,
# minKMEtoStay = 0.5, # Default 0.3
# reassignThreshold = 0, # Default 1e-6
# mergeCutHeight = 0.4, # Default 0.15
# pamStage = FALSE,
# pamRespectsDendro = TRUE,
# replaceMissingAdjacencies = TRUE,
# numericLabels = TRUE,
# saveTOMs = TRUE,
# saveTOMFileBase = paste(Species,Tissue,Type,Season, "TOM", sep="_"),
# verbose = 3)
###########
#Save Data#
moduleLabels = network$colors
moduleColors = labels2colors(network$colors)
MEs = network$MEs;
geneTree = network$dendrograms[[1]];
save(MEs, moduleLabels, moduleColors, geneTree,
file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Gill, "networkConstruction-auto", Date, sep="_"), ".rds", sep=""))))
RNA_Gill_WGCNA <- list("RNA_Gill_omics_data" = omics_data,
"RNA_Gill_datTraits"=datTraits,
"RNA_Gill_MEs"=MEs,
"RNA_Gill_moduleLabels"=moduleLabels,
"RNA_Gill_moduleColors"=moduleColors,
"RNA_Gill_geneTree"=geneTree)
saveRDS(RNA_Gill_WGCNA, file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Gill, "List", Date, sep="_"), ".rds", sep=""))))
#####################
#plotDendroAndColors#
#####################
##############################
#Number of ASV in each module#
##############################
as.data.frame(table(RNA_Gill_WGCNA$RNA_Gill_moduleLabels)) %>%
dplyr::rename(Module = Var1, Size = Freq) %>%
dplyr::mutate(Module_color = labels2colors(as.numeric(as.character(Module)))) -> module_size
module_size %>%
ggplot(aes(x = Module, y = Size, fill = Module)) +
geom_col(color = "#000000") +
ggtitle("Number of genes in each module") +
theme(legend.position = "none") +
scale_fill_manual(values = setNames(module_size$Module_color,module_size$Module)) +
geom_text(aes(label = Size),vjust = 0.5, hjust = -0.18, size = 3.5) +
ylim(0, max(module_size$Size)*1.1) +
theme(plot.margin = margin(2, 2, 2, 2, "pt")) +
coord_flip() -> RNAModuleSizePlot
RNAModuleSizePlot
ggsave(RNAModuleSizePlot, filename = paste(paste(save_name_RNA_Gill, "RNAModulePlot", sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8, height = 6)
#######################################
#Module-Eigenegene-Correlation-Heatmap#
#######################################
MEs_R <- bicor(MEs, MEs, maxPOutliers = 0.05)
idx.r <- which(rownames(MEs_R) == "ME0")
idx.c <- which(colnames(MEs_R) == "ME0")
MEs_R_noME0 <- MEs_R[-idx.r, -idx.c]
MEs_R_noME0[upper.tri(MEs_R_noME0)] %>%
as.data.frame() %>%
dplyr::rename("correlation" = ".") %>%
ggplot(aes(x=correlation)) +
geom_histogram(bins = 20) +
#geom_density() +
xlim(-1, 1) +
ggtitle(paste0(prefix,"ME correlation\n w/o ",prefix ,"ME0")) -> MEs_R_density
MEs_R_density
pheatmap::pheatmap(MEs_R_noME0, color = colorRampPalette(c("Blue", "White", "Red"))(100),
silent = T,
breaks = seq(-1,1,length.out = 101),
treeheight_row = 5,
treeheight_col = 5,
main = paste0(prefix,"ME correlation heatmap w/o ",prefix ,"ME0"),
labels_row = paste0(prefix, rownames(MEs_R)),
labels_col = paste0(prefix, colnames(MEs_R))) -> MEs_R_Corr
MEs_R_Corr
ggsave(MEs_R_Corr, filename = paste(paste(save_name_RNA_Gill, "MEs_R_Corr", sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8, height = 6)
cowplot::plot_grid(MEs_R_density, MEs_R_Corr$gtable, labels = c("D", "E"), rel_widths = c(0.6, 1)) -> density_eigen
#######################
#Network-Visualization#
#######################
# 5 Visualization of networks within R
# 5.a Visualizing the gene network
# One way to visualize a weighted network is to plot its heatmap, Fig. 1. Each row and column of the heatmap
# correspond to a single gene. The heatmap can depict adjacencies or topological overlaps, with light colors denoting
# low adjacency (overlap) and darker colors higher adjacency (overlap). In addition, the gene dendrograms and module
# colors are plotted along the top and left side of the heatmap. The package provides a convenient function to create
# such network plots; Fig. 1 was created using the following code. This code can be executed only if the network
# was calculated using a single-block approach (that is, using the 1-step automatic or the step-by-step tutorials). If
# the networks were calculated using the block-wise approach, the user will need to modify this code to perform the
# visualization in each block separately. The modi cation is simple and we leave it as an exercise for the interested
# reader.
# Calculate topological overlap anew: this could be done more efficiently by saving the TOM
# calculated during module detection, but let us do it again here.
# dissTOM = 1-TOMsimilarityFromExpr(omics_data, power = softPower);
# # Transform dissTOM with a power to make moderately strong connections more visible in the heatmap
# plotTOM = dissTOM^10;
# # Set diagonal to NA for a nicer plot
# diag(plotTOM) = NA;
# # Call the plot function
# TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")
#############################
# Correlation within modules# from Strand et al, 2021
#############################
corr_within_module <- function(omics_data, network, module_x = 1){
idx.omics_data <- which(network$colors == module_x)
idx.me <- which(colnames(network$MEs) == paste0("ME",module_x))
kME_x <- bicor(omics_data[,idx.omics_data], network$MEs[,idx.me], maxPOutliers = 0.05)
kME_x}
ggplot.list <- list()
for(m in colnames(network$MEs)){
h <- as.numeric(sub("ME","", m))
data.frame(x = suppressWarnings(corr_within_module(omics_data = omics_data, network = network, module_x = h))) %>%
ggplot() +
geom_histogram(aes(x), fill = labels2colors(h), color = "black", alpha = 0.5, bins = 20) +
xlim(-1, 1) +
xlab("ASV correlation")+
ggtitle(paste0(prefix,m)) -> da_plot
ggplot.list[[m]] <- da_plot}
ggplot.list <- ggplot.list[ggplot.list %>% names() %>% sub("ME", "", .) %>% as.numeric() %>% order()]
cowplot::plot_grid(plotlist = ggplot.list, ncol = 3) -> density_all_plot
density_all_plot
ggsave(density_all_plot, filename = paste(paste(save_name_RNA_Gill, "WithinModuleCorrelation", sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8, height = 20)
##############
#Summary Plot#
##############
cowplot::plot_grid(si_mc_plot, RNAModuleSizePlot, labels = c("","C"), ncol = 2) -> part_1
cowplot::plot_grid(part_1, density_eigen, labels = c("", ""), ncol = 1, rel_widths = c(1,1)) -> part_2
ggsave(part_2, filename = paste(paste(save_name_RNA_Gill, "RNA_Gill_Network", Date, sep="_"),".png", sep=""), path = pathPlots , device='png', dpi=300, width = 8,
height = 8)
part_2
###################
#Self made Heatmap#
###################
#https://bioinformaticsworkbook.org/tutorials/wgcna.html#gsc.tab=0
# Define numbers of genes and samples
MEs = orderMEs(RNA_Gill_WGCNA$RNA_Gill_MEs)
names(MEs) = names(MEs) %>% gsub("ME","", .) %>% paste("RNA",., sep="")
moduleTraitCor = cor(MEs, datTraits, use = "p");
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples);
textMatrix <- paste(signif(moduleTraitCor, 2), "\n(", signif(moduleTraitPvalue, 1), ")", sep = "")
dim(textMatrix) <- dim(moduleTraitCor)
# Add treatment names
MEs$treatment = row.names(MEs)
mat <- as.data.frame(t(moduleTraitCor))
mat$traits <- rownames(mat)
# tidy & plot data
module_order = names(MEs)
mME = mat %>%
pivot_longer(-traits) %>%
mutate(#name = gsub("ME", "", name),
name = factor(name, levels = module_order))
textMatrixLong <- as.data.frame(t(signif(moduleTraitCor, 2)))
textMatrixLong$traits <- rownames(textMatrixLong)
textMatrixLong = textMatrixLong %>%
pivot_longer(-traits) %>%
mutate(
#name = gsub("ME", "", name),
name = factor(name, levels = module_order))
textMatrixLong <- as.data.frame(textMatrixLong)
textMatrixLong2 <- as.data.frame(t(signif(moduleTraitPvalue, 1)))
textMatrixLong2$traits <- rownames(textMatrixLong2)
textMatrixLong2 = textMatrixLong2%>%
pivot_longer(-traits) %>%
mutate(
name = gsub("ME", "", name),
name = factor(name, levels = module_order))
textMatrixLong2 <- as.data.frame(textMatrixLong2)
## add gene counts per module
genesmod<- as.data.frame(moduleLabels)
genesmod$genes <- rownames(genesmod)
genesmod$Modules <- paste("RNA",genesmod$moduleLabels, sep="") #labels2colors(genesmod$moduleLabels)
ModCount <- as.data.frame(genesmod %>%
dplyr::group_by(Modules) %>%
dplyr::summarise(n = n()) )
ModCount <- ModCount[order(as.numeric(ModCount$n), decreasing = T),]
HM <- mME %>% ggplot(., aes(x=traits, y=name, fill=value)) +
geom_tile() +
scale_fill_gradient2(
low = "steelblue1",
high = "red",
mid = "white",
midpoint = 0,
limit = c(-1,1)) +
theme(axis.text.x = element_text(angle=90)) +
labs(title = "Module-trait Relationships", y = "Modules", fill="corr") +
geom_text(aes(label=textMatrixLong$value), position=position_nudge(y=0.2),
size=2.5, colour="grey20") +
geom_text(aes(label=paste0("(",textMatrixLong2$value,")")), position=position_nudge(y=-0.2),
colour="grey20", size=2.5) +
theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
theme( panel.grid.major = element_blank(),panel.grid.minor = element_blank())
prow <- cowplot::plot_grid(HM, labels = c(""), ncol = 1)
A<- plot_grid(prow, ncol = 1, rel_heights = c(0.02, 0.05, 0.98))
plot(A)
ggsave(A, filename = paste(paste(save_name_RNA_Gill, "WGCNA-ModuleHeatmap", Date, sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8, height = 6)
mME<- mME%>% mutate(Category = case_when((traits %in% c(
"O2",
"Salinity",
"SecchiDepth"
)) ~ "Abiotics",
(traits %in% c(
"FultonK",
"Length",
"Weight",
"StomachContent",
"HSI",
"SSI")) ~ "Physiology")); Order<- c("Abiotics", "Physiology")
HM <- mME %>% ggplot(., aes(x=name, y=factor(traits, levels=traitData), fill=value)) +
geom_tile() +
scale_fill_gradientn(
colours = c("steelblue1", "white", "red"),
limit = c(-1,1),
values = scales::rescale(c(-1, -0.3, 0, 0.3, 1))) +
scale_x_discrete(limits=ModCount$Modules, labels=paste(ModCount$Modules, ": ", ModCount$n)) +
facet_grid(factor(mME$Category, levels=Order), scales = "free", space = "free") +
theme(axis.text.x = element_text(angle=90)) +
labs(title = "Module-trait Relationships", y = "Modules", fill="corr") +
geom_text(aes(label=textMatrixLong$value), position=position_nudge(y=0.2),
size=2.5, colour="grey20") +
geom_text(aes(label=paste0("(",textMatrixLong2$value,")")), position=position_nudge(y=-0.2),
colour="grey20", size=2.5) +
theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
theme( panel.grid.major = element_blank(),panel.grid.minor = element_blank())
prow <- cowplot::plot_grid(HM, labels = c(""), ncol = 1)
A<- plot_grid(prow, ncol = 1, rel_heights = c(0.02, 0.05, 0.98))
plot(A)
ggsave(A, filename = paste(paste(save_name_RNA_Gill, "WGCNA-ModuleHeatmap-2", Date, sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 13, height = 7)
##################
#Specific Modules#
##################
# Module <- "steelblue"
# HM <- mME[mME$name == Module,] %>% ggplot(., aes(x=name, y=factor(traits, levels=traitData), fill=value)) +
# geom_tile() +
# scale_fill_gradientn(
# colours = c("steelblue1", "white", "red"),
# limit = c(-1,1),
# values = scales::rescale(c(-1, -0.3, 0, 0.3, 1))) +
# # scale_fill_gradient2(
# # low = "steelblue1",
# # high = "red",
# # mid = "white",
# # midpoint = 0,
# # limit = c(-1,1))
# #facet_grid(factor(mME$Category, levels=Order), scales = "free", space = "free") +
# theme(axis.text.x = element_text(angle=90)) +
# labs(title = "", y = "Modules", fill="corr") +
# geom_text(aes(label=textMatrixLong[textMatrixLong$name == Module,]$value), position=position_nudge(y=0.2),
# size=3, colour="grey20") +
# geom_text(aes(label=paste0("(",textMatrixLong2[textMatrixLong2$name == Module,]$value,")")), position=position_nudge(y=-0.2),
# colour="grey20", size=3) +
# theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
# theme( panel.grid.major = element_blank(),panel.grid.minor = element_blank())
# prow <- cowplot::plot_grid(HM, labels = c(""), ncol = 1)
# A<- plot_grid(prow, ncol = 1, rel_heights = c(0.02, 0.05, 0.98))
# plot(A)
# ggsave(A, filename = paste("WGCNA-ModuleHeatmap", Module, sep="_"), path = pathPlots,
# device='png', dpi=300, width = 2.8,height = 7)
RNA_Gill_WGCNA<- readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Gill, "List", Date, sep="_"), ".rds", sep=""))))
InterestingComparison <- RNA_Gill_WGCNA$RNA_Gill_MEs
Module_Genes_list <- list()
for (MODULE in names(InterestingComparison)) {
a <- length(Module_Genes_list)
ExpressionSet <- RNA_Gill_WGCNA$RNA_Gill_omics_data
moduleLabels <- RNA_Gill_WGCNA$RNA_Gill_moduleLabels
Annotations <- SLUCGeneManual
genes_of_interest <- as.data.frame(names(ExpressionSet[moduleLabels == paste(sub("ME", "", MODULE))]))
#print(head(names(ExpressionSet[moduleLabels == paste(sub("ME", "", MODULE))])))
colnames(genes_of_interest) <- "rowname"
print(paste("Module genes before Conversion to EntrezIDs"))
print(paste(MODULE, dim(genes_of_interest)[1]))
########################################
#Add annotated Gene-Names to omics data#
########################################
#print(paste("Lost by double dot to dot conversion somwhere in the pipeline"))
#print(genes_of_interest$rowname[genes_of_interest$rowname %in% SLUCGeneManual$rowname == FALSE])
AnnotationDbi_genes <-dplyr::left_join(genes_of_interest, SLUCGeneManual)
AnnotationDbi_genes$GeneSymbolHS <- AnnotationDbi_genes$Human_SYMBOL_Manual
AnnotationDbi_genes2 <- as.data.frame(AnnotationDbi_genes$Human_SYMBOL_Manual)
colnames(AnnotationDbi_genes2) <- "GeneSymbolHS"
library(org.Hs.eg.db)
HS<-AnnotationDbi::select(org.Hs.eg.db, AnnotationDbi_genes2$GeneSymbolHS, "ENTREZID","SYMBOL") #Get EntrezIDs from Symbol
#CHECK!#
subset(HS, is.na(HS$ENTREZID))
dim(subset(HS, is.na(HS$ENTREZID)))
AnnotationDbi_genes2 <- dplyr::left_join(AnnotationDbi_genes2, HS, by=c("GeneSymbolHS" = "SYMBOL"))
dedup_ids = AnnotationDbi_genes2[!duplicated(AnnotationDbi_genes2[c("ENTREZID")]),]
dedup_ids <- dedup_ids[!is.na(dedup_ids$ENTREZID),]
print(paste("Module genes After Conversion to EntrezIDs"))
print(paste(MODULE, dim(dedup_ids)[1]))
#print(head(dedup_ids))
A<- dplyr::left_join(AnnotationDbi_genes[c("Geneid", "GeneSymbolHS")], dedup_ids)
A <- A[complete.cases(A$Geneid), ]
Module_Genes_list[[a+1]] <- A
names(Module_Genes_list)[[a+1]] <- paste("GeneAnno",MODULE, sep="_")
}
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME2 2960"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME2 2184"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME11 639"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME11 459"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME7 764"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME7 548"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME10 728"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME10 605"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME20 246"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME20 191"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME25 125"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME25 103"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME31 76"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME31 52"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME16 388"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME16 330"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME38 38"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME38 30"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME17 297"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME17 262"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME5 888"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME5 774"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME4 936"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME4 845"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME15 413"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME15 360"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME21 223"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME21 180"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME26 104"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME26 83"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME37 41"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME37 38"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME36 48"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME36 29"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME9 743"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME9 547"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME14 429"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME14 324"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME27 95"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME27 64"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME12 541"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME12 435"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME13 499"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME13 416"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME22 215"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME22 183"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME28 94"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME28 50"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME32 72"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME32 49"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME29 87"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME29 51"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME34 68"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME34 50"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME30 84"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME30 60"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME35 55"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME35 31"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME3 1675"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME3 1352"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME23 174"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME23 137"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME1 3848"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME1 2814"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME6 859"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME6 733"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME8 749"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME8 546"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME24 125"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME24 86"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME18 296"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME18 216"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME19 251"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME19 196"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME33 70"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME33 51"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME0 9484"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME0 3864"
Gene_counts <- list()
for (i in names(Module_Genes_list)) {
a <- length(Gene_counts)
g <- sub("GeneAnno_ME", "", i)
A <- length(unique(na.omit(Module_Genes_list[[i]]$ENTREZID)))
B <- length(na.omit(Module_Genes_list[[i]]$Geneid))
C <- table(RNA_Gill_WGCNA$RNA_Gill_moduleLabels == g)[2]
module_summary <- data.frame(Module = i, Module_Size = C, EnrezID = A)
Gene_counts[[a+1]] <- module_summary}
print(do.call(rbind, Gene_counts))
## Module Module_Size EnrezID
## TRUE GeneAnno_ME2 2960 2184
## TRUE1 GeneAnno_ME11 639 459
## TRUE2 GeneAnno_ME7 764 548
## TRUE3 GeneAnno_ME10 728 605
## TRUE4 GeneAnno_ME20 246 191
## TRUE5 GeneAnno_ME25 125 103
## TRUE6 GeneAnno_ME31 76 52
## TRUE7 GeneAnno_ME16 388 330
## TRUE8 GeneAnno_ME38 38 30
## TRUE9 GeneAnno_ME17 297 262
## TRUE10 GeneAnno_ME5 888 774
## TRUE11 GeneAnno_ME4 936 845
## TRUE12 GeneAnno_ME15 413 360
## TRUE13 GeneAnno_ME21 223 180
## TRUE14 GeneAnno_ME26 104 83
## TRUE15 GeneAnno_ME37 41 38
## TRUE16 GeneAnno_ME36 48 29
## TRUE17 GeneAnno_ME9 743 547
## TRUE18 GeneAnno_ME14 429 324
## TRUE19 GeneAnno_ME27 95 64
## TRUE20 GeneAnno_ME12 541 435
## TRUE21 GeneAnno_ME13 499 416
## TRUE22 GeneAnno_ME22 215 183
## TRUE23 GeneAnno_ME28 94 50
## TRUE24 GeneAnno_ME32 72 49
## TRUE25 GeneAnno_ME29 87 51
## TRUE26 GeneAnno_ME34 68 50
## TRUE27 GeneAnno_ME30 84 60
## TRUE28 GeneAnno_ME35 55 31
## TRUE29 GeneAnno_ME3 1675 1352
## TRUE30 GeneAnno_ME23 174 137
## TRUE31 GeneAnno_ME1 3848 2814
## TRUE32 GeneAnno_ME6 859 733
## TRUE33 GeneAnno_ME8 749 546
## TRUE34 GeneAnno_ME24 125 86
## TRUE35 GeneAnno_ME18 296 216
## TRUE36 GeneAnno_ME19 251 196
## TRUE37 GeneAnno_ME33 70 51
## TRUE38 GeneAnno_ME0 9484 3864
RNA_Gill_WGCNA$RNA_Gill_GeneAnno <- Module_Genes_list
saveRDS(RNA_Gill_WGCNA, file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Gill, "List_3", Date, sep="_"), ".rds", sep=""))))
Individual Co-Expression networks
# The following setting is important, do not omit.
options(stringsAsFactors = FALSE);
#Read in the female liver data set
set.seed(123)
Species <- "SL"
Year <- "2021"
Season <- "Summer"
Type <- "RNA"
Tissue <- "Liver"
Analysis <- "WGCNA"
alpha <- 0.05
OperatingSystem <- "Windows"
prefix <- "RNAseq-"
#####################
Tissue <- "Liver"
variable <- "Replicates"
#####################
save_name_RNA_Liver <- paste(Species, Year,Season, Tissue, Type, Analysis, sep = "_")
paste0(file.path(path_Output_WGCNA, paste(Analysis, "_", sep="")), save_name_RNA_Liver, ".RData")
## [1] "/Users/admin/Desktop/ElbeEstuarineZander/SL_Output_WGCNA/WGCNA_SL_2021_Summer_Liver_RNA_WGCNA.RData"
##################
#Sample selection#
##################
cat(paste(shQuote(SAMDF_RNA_Liver$SampleID, type="cmd"), collapse=", "))
## "SLSU21MGEB1", "SLSU21MGEB2", "SLSU21MGEB3", "SLSU21MGEB4", "SLSU21MGEB5", "SLSU21MGEB7", "SLSU21BBEB1", "SLSU21BBEB2", "SLSU21BBEB4", "SLSU21BBEB6", "SLSU21BBEB7", "SLSU21BBEB9", "SLSU21SSEB2", "SLSU21SSEB3", "SLSU21SSEB5", "SLSU21SSEB6", "SLSU21SSEB7", "SLSU21SSEB9", "SLSU21MLEB1", "SLSU21MLEB2", "SLSU21MLEB5", "SLSU21MLEB6", "SLSU21MLEB7", "SLSU21MLEB9"
# "SLSU21MLEB1", "SLSU21MLEB2", "SLSU21MLEB5", "SLSU21MLEB6", "SLSU21MLEB7", "SLSU21MLEB9")
Samples_RNA_Liver_All<-c(
"SLSU21MGEB1", "SLSU21MGEB2", "SLSU21MGEB3", "SLSU21MGEB4", "SLSU21MGEB5", "SLSU21MGEB7",
"SLSU21BBEB1", "SLSU21BBEB2", "SLSU21BBEB4", "SLSU21BBEB6", "SLSU21BBEB9", "SLSU21BBEB7",
"SLSU21SSEB2", "SLSU21SSEB3", "SLSU21SSEB5", "SLSU21SSEB6", "SLSU21SSEB7", "SLSU21SSEB9",
"SLSU21MLEB1", "SLSU21MLEB2", "SLSU21MLEB5", "SLSU21MLEB6", "SLSU21MLEB7", "SLSU21MLEB9")
Samples_RNA_Liver<-c(
"SLSU21MGEB1", "SLSU21MGEB2", "SLSU21MGEB4", "SLSU21MGEB5", "SLSU21MGEB7",
"SLSU21BBEB1", "SLSU21BBEB2", "SLSU21BBEB4", "SLSU21BBEB6", "SLSU21BBEB9", "SLSU21BBEB7",
"SLSU21SSEB2", "SLSU21SSEB3", "SLSU21SSEB5", "SLSU21SSEB7", "SLSU21SSEB9",
"SLSU21MLEB1", "SLSU21MLEB2", "SLSU21MLEB5", "SLSU21MLEB6", "SLSU21MLEB7", "SLSU21MLEB9")
#"SLSU21SSEB6", doubble outlier from the group in Liver & Gill
#"SLSU21MGEB3", doubble outlier from the group in Liver & Gill
Samples_RNA_Liver_List <- list("Samples_RNA_Liver_All" = Samples_RNA_Liver_All, "Samples_RNA_Liver"=Samples_RNA_Liver)
WGCNA_RNA_Liver <- list()
for (Livers in names(Samples_RNA_Liver_List)){
WGCNA_RNA_Liver[[Livers]] <- list()
Samples <- Samples_RNA_Liver_List[[Livers]]
SAMDF_RNA_Liver <- SAMDF_RNA_Liver[SAMDF_RNA_Liver$SampleID %in% Samples,]
SAMDF_RNA_Liver <-SAMDF_RNA_Liver %>% arrange(factor(rownames(SAMDF_RNA_Liver), levels = Samples))
rownames(SAMDF_RNA_Liver) <-SAMDF_RNA_Liver$SampleID
#=====================================================================================
# ExperimentalDesign & Filter
#=====================================================================================
cnt_RNA_Liver <- cnt_RNA_Liver[,SAMDF_RNA_Liver$SampleID]
#ExperimentalDesign
library(DESeq2)
dds_RNA_Liver <- DESeqDataSetFromMatrix(countData = cnt_RNA_Liver,
colData = SAMDF_RNA_Liver,
design = ~ Replicates)
vst_RNA_Liver <- vst(dds_RNA_Liver, blind = FALSE)
##################
# SampleDist PCAs#
##################
vst_RNA_list <- list("vst_RNA_Liver" =vst_RNA_Liver)
for (i in names(vst_RNA_list)[grepl("vst", names(vst_RNA_list))]){
require(plyr)
require(ggrepel)
require(cowplot)
if (OperatingSystem == "Mac" ) {
quartz() }
tryCatch({
g <- paste(i)
gg <- gsub(paste0(".*", "(Gill|Liver)", ".*"), "\\1", i)
RNA_vst_PCA_Liver <-pcaplotRK3(vst_RNA_list[[i]],intgroup = c("Replicates"), pcX = 1, pcY = 2, title="",
ellipse = TRUE, ellipse.prob = 0.5) +
scale_fill_manual(values=col.Palette$SL) + #col.Palette.SeqCenter #col.Palette.Cruises
scale_color_manual(values=col.Palette$SL) + atheme +
theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
theme(
panel.grid.major = element_line(colour = "grey50"),
panel.grid.minor = element_line(colour = "grey50"))
prow <- cowplot::plot_grid(RNA_vst_PCA_Liver, labels = c(""), ncol = 1)
title <- ggdraw() + draw_label_themeRKwhite(paste(Species, gg, Type, Livers), element = "plot.title",x = 0.05, hjust = 0, vjust = 1)
subtitle <- ggdraw() + draw_label_themeRKwhite(paste("vst-PCA", "with", ... =
length(rownames(vst_RNA_list[[i]])),"genes",sep = " "), element = "plot.subtitle",x = 0.05, hjust = 0,
vjust = 1)
RNA_vst_PCA_Liver<- plot_grid(title, subtitle, prow, ncol = 1, rel_heights = c(0, 0.05, 0.989))
ggsave(RNA_vst_PCA_Liver, filename = paste(paste(save_name_RNA_Liver, "vst-PCA", Livers, sep="_"),".png", sep=""), path = pathPlots ,
device='png', dpi=300, width = 7,
height = 7)
plot(RNA_vst_PCA_Liver)
WGCNA_RNA_Liver[[Livers]]$RNA_vst_PCA_Liver <- RNA_vst_PCA_Liver
}, error=function(e){cat("ERROR :",conditionMessage(e), "\n")})
}
#######################
### 3.1.2 Clustering###
#######################
########################################
# Get overview of normalized data#
# dds <- DESeq(dds)
# VST <- getVarianceStabilizedData(dds)
# rv_VST <- rowVars(VST)
# summary(rv_VST)
# q75_VST <- quantile( rowVars(VST), .75) # <= original
# expr_normalized <- VST[ rv_VST > q75_VST, ]
#
# expr_normalized_df <- data.frame(expr_normalized) %>%
# mutate(Gene_id = row.names(expr_normalized)) %>% pivot_longer(-Gene_id)
#
# expr_normalized_df %>% ggplot(., aes(x = name, y = value)) +
# geom_violin() +
# geom_point() +
# theme_bw() +
# theme(axis.text.x = element_text( angle = 90)) +
# ylim(0, NA) +
# labs(
# title = "Normalized and 75 quantile Expression",
# x = "treatment",y = "normalized expression")
####################################################
#=====================================================================================
# Code chunk 3
#=====================================================================================
library(WGCNA)
#rlog-Transformation
omics_data0 <-data.frame(t(assay(vst_RNA_Liver)))
a <- length(omics_data0)
gsg = goodSamplesGenes(omics_data0, verbose = 3);
gsg$allOK
#=====================================================================================
# Code chunk 4
#=====================================================================================
if (!gsg$allOK) {
# Optionally, print the gene and sample names that were removed:
if (sum(!gsg$goodGenes)>0)
print(paste("Removing genes"));
if (sum(!gsg$goodSamples)>0)
print(paste("Removing samples:"));
# Remove the offending genes and samples from the data:
omics_data0 = omics_data0[gsg$goodSamples, gsg$goodGenes]
}
b <- length(omics_data0)
print(paste("genes before", a))
print(paste("genes before", b))
print(paste("genes before", b-a))
#=====================================================================================
# Code chunk 5
#=====================================================================================
sampleTree = hclust(dist(omics_data0), method = "average");
# Plot the sample tree: Open a graphic output window of size 12 by 9 inches
# The user should change the dimensions if the window is too large or too small.
sizeGrWindow(12,9)
#pdf(file = "Plots/sampleClustering.pdf", width = 12, height = 9);
par(cex = 0.6);
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,
cex.axis = 1.5, cex.main = 2)
#=====================================================================================
# Code chunk 6
#=====================================================================================
# Plot a line to show the cut
abline(h = 100, col = "red");
# Determine cluster under the line
clust = cutreeStatic(sampleTree, cutHeight = 100, minSize = 10)
table(clust)
# clust 1 contains the samples we want to keep.
recordPlot() -> SampleClusteringTreePlot
#in Case we would exclude outliers from the tree:
#keepSamples = (clust==1)
#omics_data0 = omics_data0[keepSamples, ]
#Keep all samples
omics_data = omics_data0
nGenes = ncol(omics_data)
nSamples = nrow(omics_data)
#=====================================================================================
# Code chunk 7
#=====================================================================================
datTraits<-SAMDF_RNA_Liver[,traitData]
datTraits <- datTraits[rownames(datTraits) %in% rownames(omics_data),]
collectGarbage()
#=====================================================================================
# Code chunk 8
#=====================================================================================
# Re-cluster samples
sampleTree2 = hclust(dist(omics_data), method = "average")
# Convert traits to a color representation: white means low, red means high, grey means missing entry
traitColors = numbers2colors(datTraits, signed = FALSE)
# Plot the sample dendrogram and the colors underneath.
plotDendroAndColors(sampleTree2, traitColors,
groupLabels = names(datTraits),
main = "Sample dendrogram and trait heatmap")
recordPlot() -> SampleClusteringTreeTraitsPlot
#=====================================================================================
# Code chunk 9
#=====================================================================================
WGCNA_RNA_Liver[[Livers]]$omics_data <- omics_data
WGCNA_RNA_Liver[[Livers]]$datTraits <- datTraits
WGCNA_RNA_Gill[[Livers]]$SAMDF_RNA_Liver <- SAMDF_RNA_Liver
WGCNA_RNA_Liver[[Livers]]$SampleClusteringTreePlot <- SampleClusteringTreePlot
WGCNA_RNA_Liver[[Livers]]$SampleClusteringTreeTraitsPlot <- SampleClusteringTreeTraitsPlot
}
## [1] "Adapted from Marini F, Binder H (2019). “pcaExplorer: an R/Bioconductor package for interacting with RNA-seq principal components.” BMC Bioinformatics, 20(1), 331. doi:10.1186/s12859-019-2879-1, https://bioconductor.org/packages/pcaExplorer/."
## Flagging genes and samples with too many missing values...
## ..step 1
## ..Excluding 4702 genes from the calculation due to too many missing samples or zero variance.
## ..step 2
## [1] "Removing genes"
## [1] "genes before 32916"
## [1] "genes before 28214"
## [1] "genes before -4702"
## [1] "Adapted from Marini F, Binder H (2019). “pcaExplorer: an R/Bioconductor package for interacting with RNA-seq principal components.” BMC Bioinformatics, 20(1), 331. doi:10.1186/s12859-019-2879-1, https://bioconductor.org/packages/pcaExplorer/."
## Flagging genes and samples with too many missing values...
## ..step 1
## ..Excluding 4850 genes from the calculation due to too many missing samples or zero variance.
## ..step 2
## [1] "Removing genes"
## [1] "genes before 32916"
## [1] "genes before 28066"
## [1] "genes before -4850"
SAMDF_RNA_Liver <-WGCNA_RNA_Liver$Samples_RNA_Liver$SAMDF_RNA_Liver
omics_data<- WGCNA_RNA_Liver$Samples_RNA_Liver$omics_data
datTraits<- WGCNA_RNA_Liver$Samples_RNA_Liver$datTraits
save(omics_data, datTraits, file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Liver, "dataInput", Date, sep="_"), ".RData", sep=""))))
##############
#Summary Plot#
##############
cowplot::plot_grid(WGCNA_RNA_Liver$Samples_RNA_Liver_All$SampleClusteringTreePlot, WGCNA_RNA_Liver$Samples_RNA_Liver$SampleClusteringTreePlot, labels = c("A", "B"), rel_heights = c(1,1), rel_widths = c(1,0.8), ncol = 2) -> part_1
part_1
cowplot::plot_grid(WGCNA_RNA_Liver$Samples_RNA_Liver_All$RNA_vst_PCA_Liver, WGCNA_RNA_Liver$Samples_RNA_Liver$RNA_vst_PCA_Liver, labels = c("C", "D"), rel_heights = c(1,1), rel_widths = c(1,1), ncol = 2) -> part_2
part_2
cowplot::plot_grid(part_1, part_2, ncol = 1) -> part_3
ggsave(part_3, filename = paste(paste(save_name_RNA_Liver, "SSU_DataInputPlot", Date, sep="_"),".png", sep=""), path = pathPlots , device='png', dpi=300, width = 12, height = 12)
part_3
Soft Threshold Find the soft thresholding power beta to which co-expression similarity is raised to calculate adjacency, based on the criterion of approximate scale-free topology.
The general guidance for WGCNA and hdWGCNA is to pick the lowest soft power threshold that has a Scale Free Topology Model Fit greater than or equal to 0.8, so in this case we would select our soft power threshold as 9. Later on, the ConstructNetwork will automatically select the soft power threshold if the user does not provide one.
#=====================================================================================
# Code chunk 1
#=====================================================================================
# Load the WGCNA package
library(WGCNA)
# The following setting is important, do not omit.
options(stringsAsFactors = FALSE);
# Allow multi-threading within WGCNA. At present this call is necessary.
# Any error here may be ignored but you may want to update WGCNA if you see one.
# Caution: skip this line if you run RStudio or other third-party R environments.
# See note above.
#enableWGCNAThreads()
# Load the data saved in the first part
lnames = load(file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Liver, "dataInput", Date, sep="_"), ".RData", sep=""))))
#The variable lnames contains the names of loaded variables.
lnames
## [1] "omics_data" "datTraits"
#=====================================================================================
# Code chunk 2
#=====================================================================================
#from Strand et al, 2021
# Choose a set of soft-thresholding powers
powers = c(c(1:10), seq(from = 12, to=20, by=2))
# Call the network topology analysis function
allowWGCNAThreads() #needed otherwise would not work! https://www.biostars.org/p/122349/
## Allowing multi-threading with up to 8 threads.
sft = pickSoftThreshold(omics_data, powerVector = powers, verbose = 5)
## pickSoftThreshold: will use block size 1594.
## pickSoftThreshold: calculating connectivity for given powers...
## ..working on genes 1 through 1594 of 28066
## ..working on genes 1595 through 3188 of 28066
## ..working on genes 3189 through 4782 of 28066
## ..working on genes 4783 through 6376 of 28066
## ..working on genes 6377 through 7970 of 28066
## ..working on genes 7971 through 9564 of 28066
## ..working on genes 9565 through 11158 of 28066
## ..working on genes 11159 through 12752 of 28066
## ..working on genes 12753 through 14346 of 28066
## ..working on genes 14347 through 15940 of 28066
## ..working on genes 15941 through 17534 of 28066
## ..working on genes 17535 through 19128 of 28066
## ..working on genes 19129 through 20722 of 28066
## ..working on genes 20723 through 22316 of 28066
## ..working on genes 22317 through 23910 of 28066
## ..working on genes 23911 through 25504 of 28066
## ..working on genes 25505 through 27098 of 28066
## ..working on genes 27099 through 28066 of 28066
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1 1 0.282 -1.89 0.787 5790.00 5550.000 8790
## 2 2 0.792 -2.20 0.927 1890.00 1670.000 4240
## 3 3 0.889 -2.15 0.950 789.00 625.000 2520
## 4 4 0.882 -2.11 0.936 389.00 280.000 1700
## 5 5 0.866 -2.08 0.926 216.00 143.000 1230
## 6 6 0.858 -2.06 0.928 132.00 77.500 931
## 7 7 0.840 -2.04 0.927 86.40 44.200 730
## 8 8 0.813 -2.05 0.928 60.20 26.900 587
## 9 9 0.804 -2.01 0.935 44.10 16.900 481
## 10 10 0.786 -1.98 0.935 33.70 11.000 401
## 11 12 0.749 -1.83 0.889 21.90 4.950 288
## 12 14 0.774 -1.51 0.770 15.90 2.440 215
## 13 16 0.304 -2.43 0.237 12.60 1.280 207
## 14 18 0.311 -3.06 0.255 10.70 0.700 206
## 15 20 0.307 -3.33 0.223 9.42 0.398 206
# Plot the results:
# Find the soft thresholding power beta to which co-expression similarity is raised to calculate adjacency.
# based on the criterion of approximate scale-free topology.
idx <- min(which((-sign(sft$fitIndices[,3])*sft$fitIndices[,2]) > 0.90))
if(is.infinite(idx)){
idx <- min(which((-sign(sft$fitIndices[,3])*sft$fitIndices[,2]) > 0.80))
if(!is.infinite(idx)){
st <- sft$fitIndices[idx,1]
} else{
idx <- which.max(-sign(sft$fitIndices[,3])*sft$fitIndices[,2])
st <- sft$fitIndices[idx,1]
}
} else{st <- sft$fitIndices[idx,1]}
# Plot Scale independence measure and Mean connectivity measure
# Scale-free topology fit index as a function of the soft-thresholding power
data.frame(Indices = sft$fitIndices[,1],
sfApprox = -sign(sft$fitIndices[,3])*sft$fitIndices[,2]) %>%
ggplot() +
geom_hline(yintercept = 0.9, color = "red", alpha = 0.6) + # corresponds to R^2 cut-off of 0.9
geom_hline(yintercept = 0.8, color = "red", alpha = 0.2) + # corresponds to R^2 cut-off of 0.8
geom_line(aes(x = Indices, y = sfApprox), color = "red", alpha = 0.1, size = 2.5) +
geom_text(mapping = aes(x = Indices, y = sfApprox, label = Indices), color = "red", size = 4) +
ggtitle("Scale independence") +
xlab("Soft Threshold (power)") +
ylab("SF Model Fit,signed R^2") +
xlim(1,20) +
ylim(-1,1) +
geom_segment(aes(x = st, y = 0.25, xend = st, yend = sfApprox[idx]-0.05),
arrow = arrow(length = unit(0.2,"cm")),
size = 0.5)-> scale_independence_plot
# Mean connectivity as a function of the soft-thresholding power
data.frame(Indices = sft$fitIndices[,1],
meanApprox = sft$fitIndices[,5]) %>%
ggplot() +
geom_line(aes(x = Indices, y = meanApprox), color = "red", alpha = 0.1, size = 2.5) +
geom_text(mapping = aes(x = Indices, y = meanApprox, label = Indices), color = "red", size = 4) +
xlab("Soft Threshold (power)") +
ylab("Mean Connectivity") +
geom_segment(aes(x = st-0.4,
y = sft$fitIndices$mean.k.[idx],
xend = 0,
yend = sft$fitIndices$mean.k.[idx]),
arrow = arrow(length = unit(0.2,"cm")),
size = 0.4) +
ggtitle(paste0("Mean connectivity: ",
round(sft$fitIndices$mean.k.[idx],2))) -> mean_connectivity_plot
si_mc_plot <- cowplot::plot_grid(scale_independence_plot, mean_connectivity_plot, ncol = 2, align = "h", labels = c("A", "B"))
si_mc_plot
ggsave(si_mc_plot, filename = paste(paste(save_name_RNA_Liver, "soft_thresholding_power", sep="_"),".png", sep=""), path = pathPlots,
device='png', dpi=300, width = 8, height = 6)
softPower = st; print(paste("SoftPower chosen to", softPower))
## [1] "SoftPower chosen to 3"
softPower <- 3
deepSplit integer value between 0 and 4. Provides a simplified control over how sensitive module detection should be to module splitting, with 0 least and 4 most sensitive. See cutreeDynamic for more details.
##################
#blockwiseModules#
##################
softPower = softPower; print(paste("SoftPower chosen to", softPower))
## [1] "SoftPower chosen to 3"
#https://www.biostars.org/p/305714/
#there is a problem with cor form stats and cor from WGCNA, restart R
network = blockwiseModules(omics_data,
power = softPower,
networkType = "signed",
TOMType = "signed",
corType = "bicor",
minModuleSize = 30, #for genes 30
reassignThreshold = 0,
deepSplit = 2,
mergeCutHeight = 0.25,
numericLabels = TRUE,
pamRespectsDendro = FALSE,
saveTOMs = TRUE,
saveTOMFileBase = paste(Species,Tissue,Type,Season, "TOM", sep="_"),
verbose = 3)
## Calculating module eigengenes block-wise from all genes
## Flagging genes and samples with too many missing values...
## ..step 1
## ....pre-clustering genes to determine blocks..
## Projective K-means:
## ..k-means clustering..
## ..merging smaller clusters...
## Block sizes:
## gBlocks
## 1 2 3 4 5 6
## 4988 4976 4645 4541 4521 4395
## ..Working on block 1 .
## TOM calculation: adjacency..
## ..will use 8 parallel threads.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
## ..saving TOM for block 1 into file SL_Liver_RNA_Summer_TOM-block.1.RData
## ....clustering..
## ....detecting modules..
## ....calculating module eigengenes..
## ....checking kME in modules..
## ..removing 804 genes from module 1 because their KME is too low.
## ..removing 112 genes from module 2 because their KME is too low.
## ..removing 24 genes from module 3 because their KME is too low.
## ..Working on block 2 .
## TOM calculation: adjacency..
## ..will use 8 parallel threads.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
## ..saving TOM for block 2 into file SL_Liver_RNA_Summer_TOM-block.2.RData
## ....clustering..
## ....detecting modules..
## ....calculating module eigengenes..
## ....checking kME in modules..
## ..removing 144 genes from module 1 because their KME is too low.
## ..removing 102 genes from module 2 because their KME is too low.
## ..Working on block 3 .
## TOM calculation: adjacency..
## ..will use 8 parallel threads.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
## ..saving TOM for block 3 into file SL_Liver_RNA_Summer_TOM-block.3.RData
## ....clustering..
## ....detecting modules..
## ....calculating module eigengenes..
## ....checking kME in modules..
## ..removing 88 genes from module 1 because their KME is too low.
## ..removing 595 genes from module 2 because their KME is too low.
## ..removing 407 genes from module 3 because their KME is too low.
## ..removing 36 genes from module 4 because their KME is too low.
## ..removing 245 genes from module 5 because their KME is too low.
## ..removing 5 genes from module 9 because their KME is too low.
## ..removing 1 genes from module 11 because their KME is too low.
## ..removing 68 genes from module 12 because their KME is too low.
## ..removing 24 genes from module 13 because their KME is too low.
## ..Working on block 4 .
## TOM calculation: adjacency..
## ..will use 8 parallel threads.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
## ..saving TOM for block 4 into file SL_Liver_RNA_Summer_TOM-block.4.RData
## ....clustering..
## ....detecting modules..
## ....calculating module eigengenes..
## ....checking kME in modules..
## ..removing 78 genes from module 1 because their KME is too low.
## ..removing 621 genes from module 2 because their KME is too low.
## ..removing 502 genes from module 3 because their KME is too low.
## ..removing 432 genes from module 4 because their KME is too low.
## ..removing 323 genes from module 5 because their KME is too low.
## ..removing 228 genes from module 6 because their KME is too low.
## ..removing 264 genes from module 7 because their KME is too low.
## ..removing 3 genes from module 8 because their KME is too low.
## ..removing 1 genes from module 9 because their KME is too low.
## ..removing 1 genes from module 11 because their KME is too low.
## ..Working on block 5 .
## TOM calculation: adjacency..
## ..will use 8 parallel threads.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
## ..saving TOM for block 5 into file SL_Liver_RNA_Summer_TOM-block.5.RData
## ....clustering..
## ....detecting modules..
## ....calculating module eigengenes..
## ....checking kME in modules..
## ..removing 580 genes from module 1 because their KME is too low.
## ..removing 468 genes from module 2 because their KME is too low.
## ..removing 437 genes from module 3 because their KME is too low.
## ..removing 70 genes from module 4 because their KME is too low.
## ..removing 372 genes from module 5 because their KME is too low.
## ..removing 300 genes from module 6 because their KME is too low.
## ..removing 245 genes from module 9 because their KME is too low.
## ..removing 18 genes from module 10 because their KME is too low.
## ..removing 2 genes from module 11 because their KME is too low.
## ..Working on block 6 .
## TOM calculation: adjacency..
## ..will use 8 parallel threads.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
## ..saving TOM for block 6 into file SL_Liver_RNA_Summer_TOM-block.6.RData
## ....clustering..
## ....detecting modules..
## ....calculating module eigengenes..
## ....checking kME in modules..
## ..removing 53 genes from module 1 because their KME is too low.
## ..removing 57 genes from module 2 because their KME is too low.
## ..removing 10 genes from module 3 because their KME is too low.
## ..removing 372 genes from module 4 because their KME is too low.
## ..removing 41 genes from module 8 because their KME is too low.
## ..removing 88 genes from module 9 because their KME is too low.
## ..removing 2 genes from module 10 because their KME is too low.
## ..merging modules that are too close..
## mergeCloseModules: Merging modules whose distance is less than 0.25
## Calculating new MEs...
#Strand et., 2021
# network <- blockwiseModules(omics_data,
# power = st,
# networkType = "signed",
# TOMType = "signed",
# corType = "bicor",
# maxPOutliers = 0.05,
# deepSplit = 4, # Default 2
# minModuleSize = 2, # 30
# minCoreKME = 0.5, # Default 0.5
# minCoreKMESize = 2, # Default minModuleSize/3,
# minKMEtoStay = 0.5, # Default 0.3
# reassignThreshold = 0, # Default 1e-6
# mergeCutHeight = 0.4, # Default 0.15
# pamStage = FALSE,
# pamRespectsDendro = TRUE,
# replaceMissingAdjacencies = TRUE,
# numericLabels = TRUE,
# saveTOMs = TRUE,
# saveTOMFileBase = paste(Species,Tissue,Type,Season, "TOM", sep="_"),
# verbose = 3)
###########
#Save Data#
moduleLabels = network$colors
moduleColors = labels2colors(network$colors)
MEs = network$MEs;
geneTree = network$dendrograms[[1]];
save(MEs, moduleLabels, moduleColors, geneTree,
file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Liver, "networkConstruction-auto", Date, sep="_"), ".rds", sep=""))))
lnames = load(file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Liver, "networkConstruction-auto", Date, sep="_"), ".rds", sep=""))))
RNA_Liver_WGCNA <- list("RNA_Liver_omics_data" = omics_data,
"RNA_Liver_datTraits"=datTraits,
"RNA_Liver_MEs"=MEs,
"RNA_Liver_moduleLabels"=moduleLabels,
"RNA_Liver_moduleColors"=moduleColors,
"RNA_Liver_geneTree"=geneTree)
saveRDS(RNA_Liver_WGCNA, file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Liver, "List", Date, sep="_"), ".rds", sep=""))))
RNA_Liver_WGCNA <- readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Liver, "List", Date, sep="_"), ".rds", sep=""))))
##############################
#Number of ASV in each module#
##############################
as.data.frame(table(RNA_Liver_WGCNA$RNA_Liver_moduleLabels)) %>%
dplyr::rename(Module = Var1, Size = Freq) %>%
dplyr::mutate(Module_color = labels2colors(as.numeric(as.character(Module)))) -> module_size
module_size %>%
ggplot(aes(x = Module, y = Size, fill = Module)) +
geom_col(color = "#000000") +
ggtitle("Number of genes in each module") +
theme(legend.position = "none") +
scale_fill_manual(values = setNames(module_size$Module_color,module_size$Module)) +
geom_text(aes(label = Size),vjust = 0.5, hjust = -0.18, size = 3.5) +
ylim(0, max(module_size$Size)*1.1) +
theme(plot.margin = margin(2, 2, 2, 2, "pt")) +
coord_flip() -> RNAModuleSizePlot
RNAModuleSizePlot
ggsave(RNAModuleSizePlot, filename = paste(paste(save_name_RNA_Liver, "RNAModulePlot", sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8, height = 6)
#######################################
#Module-Eigenegene-Correlation-Heatmap#
#######################################
MEs_R <- bicor(MEs, MEs, maxPOutliers = 0.05)
idx.r <- which(rownames(MEs_R) == "ME0")
idx.c <- which(colnames(MEs_R) == "ME0")
MEs_R_noME0 <- MEs_R[-idx.r, -idx.c]
MEs_R_noME0[upper.tri(MEs_R_noME0)] %>%
as.data.frame() %>%
dplyr::rename("correlation" = ".") %>%
ggplot(aes(x=correlation)) +
geom_histogram(bins = 20) +
#geom_density() +
xlim(-1, 1) +
ggtitle(paste0(prefix,"ME correlation\n w/o ",prefix ,"ME0")) -> MEs_R_density
MEs_R_density
pheatmap::pheatmap(MEs_R_noME0, color = colorRampPalette(c("Blue", "White", "Red"))(100),
silent = T,
breaks = seq(-1,1,length.out = 101),
treeheight_row = 5,
treeheight_col = 5,
main = paste0(prefix,"ME correlation heatmap w/o ",prefix ,"ME0"),
labels_row = paste0(prefix, rownames(MEs_R)),
labels_col = paste0(prefix, colnames(MEs_R))) -> MEs_R_Corr
ggsave(MEs_R_Corr, filename = paste(paste(save_name_RNA_Liver, "MEs_R_Corr", sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8, height = 6)
cowplot::plot_grid(MEs_R_density, MEs_R_Corr$gtable, labels = c("D", "E"), rel_widths = c(0.6, 1)) -> density_eigen
#######################
#Network-Visualization# Takes to long for knitting
#######################
# 5 Visualization of networks within R
# 5.a Visualizing the gene network
# One way to visualize a weighted network is to plot its heatmap, Fig. 1. Each row and column of the heatmap
# correspond to a single gene. The heatmap can depict adjacencies or topological overlaps, with light colors denoting
# low adjacency (overlap) and darker colors higher adjacency (overlap). In addition, the gene dendrograms and module
# colors are plotted along the top and left side of the heatmap. The package provides a convenient function to create
# such network plots; Fig. 1 was created using the following code. This code can be executed only if the network
# was calculated using a single-block approach (that is, using the 1-step automatic or the step-by-step tutorials). If
# the networks were calculated using the block-wise approach, the user will need to modify this code to perform the
# visualization in each block separately. The modi cation is simple and we leave it as an exercise for the interested
# reader.
# Calculate topological overlap anew: this could be done more efficiently by saving the TOM
# calculated during module detection, but let us do it again here.
# dissTOM = 1-TOMsimilarityFromExpr(omics_data, power = softPower);
# # Transform dissTOM with a power to make moderately strong connections more visible in the heatmap
# plotTOM = dissTOM^10;
# # Set diagonal to NA for a nicer plot
# diag(plotTOM) = NA;
# # Call the plot function
# TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")
#############################
# Correlation within modules# from Strand et al, 2021
#############################
corr_within_module <- function(omics_data, network, module_x = 1){
idx.omics_data <- which(network$colors == module_x)
idx.me <- which(colnames(network$MEs) == paste0("ME",module_x))
kME_x <- bicor(omics_data[,idx.omics_data], network$MEs[,idx.me], maxPOutliers = 0.05)
kME_x}
ggplot.list <- list()
for(m in colnames(network$MEs)){
h <- as.numeric(sub("ME","", m))
data.frame(x = suppressWarnings(corr_within_module(omics_data = omics_data, network = network, module_x = h))) %>%
ggplot() +
geom_histogram(aes(x), fill = labels2colors(h), color = "black", alpha = 0.5, bins = 20) +
xlim(-1, 1) +
xlab("ASV correlation")+
ggtitle(paste0(prefix,m)) -> da_plot
ggplot.list[[m]] <- da_plot}
ggplot.list <- ggplot.list[ggplot.list %>% names() %>% sub("ME", "", .) %>% as.numeric() %>% order()]
cowplot::plot_grid(plotlist = ggplot.list, ncol = 3) -> density_all_plot
density_all_plot
ggsave(density_all_plot, filename = paste(paste(save_name_RNA_Liver, "WithinModuleCorrelation", sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8, height = 20)
##############
#Summary Plot#
##############
cowplot::plot_grid(si_mc_plot, RNAModuleSizePlot, labels = c("","C"), ncol = 2) -> part_1
cowplot::plot_grid(part_1, density_eigen, labels = c("", ""), ncol = 1, rel_widths = c(1,1)) -> part_2
ggsave(part_2, filename = paste(paste(save_name_RNA_Liver, "RNA_Liver_Network", Date, sep="_"),".png", sep=""), path = pathPlots , device='png', dpi=300, width = 8,
height = 8)
part_2
###################
#Self made Heatmap#
###################
#https://bioinformaticsworkbook.org/tutorials/wgcna.html#gsc.tab=0
# Define numbers of genes and samples
MEs = orderMEs(RNA_Liver_WGCNA$RNA_Liver_MEs)
names(MEs) = names(MEs) %>% gsub("ME","", .) %>% paste("RNA",., sep="")
moduleTraitCor = cor(MEs, datTraits, use = "p");
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples);
textMatrix <- paste(signif(moduleTraitCor, 2), "\n(", signif(moduleTraitPvalue, 1), ")", sep = "")
dim(textMatrix) <- dim(moduleTraitCor)
# Add treatment names
MEs$treatment = row.names(MEs)
mat <- as.data.frame(t(moduleTraitCor))
mat$traits <- rownames(mat)
# tidy & plot data
module_order = names(MEs)
mME = mat %>%
pivot_longer(-traits) %>%
mutate(#name = gsub("ME", "", name),
name = factor(name, levels = module_order))
textMatrixLong <- as.data.frame(t(signif(moduleTraitCor, 2)))
textMatrixLong$traits <- rownames(textMatrixLong)
textMatrixLong = textMatrixLong %>%
pivot_longer(-traits) %>%
mutate(
#name = gsub("ME", "", name),
name = factor(name, levels = module_order))
textMatrixLong <- as.data.frame(textMatrixLong)
textMatrixLong2 <- as.data.frame(t(signif(moduleTraitPvalue, 1)))
textMatrixLong2$traits <- rownames(textMatrixLong2)
textMatrixLong2 = textMatrixLong2%>%
pivot_longer(-traits) %>%
mutate(
name = gsub("ME", "", name),
name = factor(name, levels = module_order))
textMatrixLong2 <- as.data.frame(textMatrixLong2)
## add gene counts per module
genesmod<- as.data.frame(moduleLabels)
genesmod$genes <- rownames(genesmod)
genesmod$Modules <- paste("RNA",genesmod$moduleLabels, sep="") #labels2colors(genesmod$moduleLabels)
ModCount <- as.data.frame(genesmod %>%
dplyr::group_by(Modules) %>%
dplyr::summarise(n = n()) )
ModCount <- ModCount[order(as.numeric(ModCount$n), decreasing = T),]
HM <- mME %>% ggplot(., aes(x=traits, y=name, fill=value)) +
geom_tile() +
scale_fill_gradient2(
low = "steelblue1",
high = "red",
mid = "white",
midpoint = 0,
limit = c(-1,1)) +
theme(axis.text.x = element_text(angle=90)) +
labs(title = "Module-trait Relationships", y = "Modules", fill="corr") +
geom_text(aes(label=textMatrixLong$value), position=position_nudge(y=0.2),
size=2.5, colour="grey20") +
geom_text(aes(label=paste0("(",textMatrixLong2$value,")")), position=position_nudge(y=-0.2),
colour="grey20", size=2.5) +
theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
theme( panel.grid.major = element_blank(),panel.grid.minor = element_blank())
prow <- cowplot::plot_grid(HM, labels = c(""), ncol = 1)
A<- plot_grid(prow, ncol = 1, rel_heights = c(0.02, 0.05, 0.98))
plot(A)
ggsave(A, filename = paste(paste(save_name_RNA_Liver, "WGCNA-ModuleHeatmap", Date, sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 8, height = 6)
mME<- mME%>% mutate(Category = case_when((traits %in% c(
"O2",
"Salinity",
"SecchiDepth"
)) ~ "Abiotics",
(traits %in% c(
"FultonK",
"Length",
"Weight",
"StomachContent",
"HSI",
"SSI")) ~ "Physiology")); Order<- c("Abiotics", "Physiology")
HM <- mME %>% ggplot(., aes(x=name, y=factor(traits, levels=traitData), fill=value)) +
geom_tile() +
scale_fill_gradientn(
colours = c("steelblue1", "white", "red"),
limit = c(-1,1),
values = scales::rescale(c(-1, -0.3, 0, 0.3, 1))) +
scale_x_discrete(limits=ModCount$Modules, labels=paste(ModCount$Modules, ": ", ModCount$n)) +
facet_grid(factor(mME$Category, levels=Order), scales = "free", space = "free") +
theme(axis.text.x = element_text(angle=90)) +
labs(title = "Module-trait Relationships", y = "Modules", fill="corr") +
geom_text(aes(label=textMatrixLong$value), position=position_nudge(y=0.2),
size=2.5, colour="grey20") +
geom_text(aes(label=paste0("(",textMatrixLong2$value,")")), position=position_nudge(y=-0.2),
colour="grey20", size=2.5) +
theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
theme( panel.grid.major = element_blank(),panel.grid.minor = element_blank())
prow <- cowplot::plot_grid(HM, labels = c(""), ncol = 1)
A<- plot_grid(prow, ncol = 1, rel_heights = c(0.02, 0.05, 0.98))
plot(A)
ggsave(A, filename = paste(paste(save_name_RNA_Liver, "WGCNA-ModuleHeatmap-2", Date, sep="_"),".png", sep=""), path = pathPlots, device='png', dpi=300, width = 13, height = 7)
##################
#Specific Modules#
##################
# Module <- "steelblue"
# HM <- mME[mME$name == Module,] %>% ggplot(., aes(x=name, y=factor(traits, levels=traitData), fill=value)) +
# geom_tile() +
# scale_fill_gradientn(
# colours = c("steelblue1", "white", "red"),
# limit = c(-1,1),
# values = scales::rescale(c(-1, -0.3, 0, 0.3, 1))) +
# # scale_fill_gradient2(
# # low = "steelblue1",
# # high = "red",
# # mid = "white",
# # midpoint = 0,
# # limit = c(-1,1))
# #facet_grid(factor(mME$Category, levels=Order), scales = "free", space = "free") +
# theme(axis.text.x = element_text(angle=90)) +
# labs(title = "", y = "Modules", fill="corr") +
# geom_text(aes(label=textMatrixLong[textMatrixLong$name == Module,]$value), position=position_nudge(y=0.2),
# size=3, colour="grey20") +
# geom_text(aes(label=paste0("(",textMatrixLong2[textMatrixLong2$name == Module,]$value,")")), position=position_nudge(y=-0.2),
# colour="grey20", size=3) +
# theme_minimal() + atheme + theme(axis.title.x = element_blank()) +
# theme( panel.grid.major = element_blank(),panel.grid.minor = element_blank())
# prow <- cowplot::plot_grid(HM, labels = c(""), ncol = 1)
# A<- plot_grid(prow, ncol = 1, rel_heights = c(0.02, 0.05, 0.98))
# plot(A)
# ggsave(A, filename = paste("WGCNA-ModuleHeatmap", Module, sep="_"), path = pathPlots,
# device='png', dpi=300, width = 2.8,height = 7)
RNA_Liver_WGCNA<- readRDS(file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Liver, "List", Date, sep="_"), ".rds", sep=""))))
InterestingComparison <- RNA_Liver_WGCNA$RNA_Liver_MEs
Module_Genes_list <- list()
for (MODULE in names(InterestingComparison)) {
a <- length(Module_Genes_list)
ExpressionSet <- RNA_Liver_WGCNA$RNA_Liver_omics_data
moduleLabels <- RNA_Liver_WGCNA$RNA_Liver_moduleLabels
Annotations <- SLUCGeneManual
genes_of_interest <- as.data.frame(names(ExpressionSet[moduleLabels == paste(sub("ME", "", MODULE))]))
print(head(names(ExpressionSet[moduleLabels == paste(sub("ME", "", MODULE))])))
colnames(genes_of_interest) <- "rowname"
print(paste("Module genes before Conversion to EntrezIDs"))
print(paste(MODULE, dim(genes_of_interest)[1]))
########################################
#Add annotated Gene-Names to omics data#
########################################
#print(paste("Lost by double dot to dot conversion somwhere in the pipeline"))
#print(genes_of_interest$rowname[genes_of_interest$rowname %in% SLUCGeneManual$rowname == FALSE])
AnnotationDbi_genes <-dplyr::left_join(genes_of_interest, SLUCGeneManual)
AnnotationDbi_genes$GeneSymbolHS <- AnnotationDbi_genes$Human_SYMBOL_Manual
AnnotationDbi_genes2 <- as.data.frame(AnnotationDbi_genes$Human_SYMBOL_Manual)
colnames(AnnotationDbi_genes2) <- "GeneSymbolHS"
library(org.Hs.eg.db)
HS<-AnnotationDbi::select(org.Hs.eg.db, AnnotationDbi_genes2$GeneSymbolHS, "ENTREZID","SYMBOL") #Get EntrezIDs from Symbol
#CHECK!#
subset(HS, is.na(HS$ENTREZID))
dim(subset(HS, is.na(HS$ENTREZID)))
AnnotationDbi_genes2 <- dplyr::left_join(AnnotationDbi_genes2, HS, by=c("GeneSymbolHS" = "SYMBOL"))
dedup_ids = AnnotationDbi_genes2[!duplicated(AnnotationDbi_genes2[c("ENTREZID")]),]
dedup_ids <- dedup_ids[!is.na(dedup_ids$ENTREZID),]
print(paste("Module genes After Conversion to EntrezIDs"))
print(paste(MODULE, dim(dedup_ids)[1]))
#print(head(dedup_ids))
A<- dplyr::left_join(AnnotationDbi_genes[c("Geneid", "GeneSymbolHS")], dedup_ids)
A <- A[complete.cases(A$Geneid), ]
Module_Genes_list[[a+1]] <- A
names(Module_Genes_list)[[a+1]] <- paste("GeneAnno",MODULE, sep="_")
}
## [1] "si.ch211.235e9.8" "LOC116042180" "surf2" "ass1"
## [5] "exosc2" "zmat5"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME9 519"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME9 462"
## [1] "nocta" "LOC116042192" "enpp6" "csnk1a1" "fbxo38"
## [6] "matr3l1.2"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME1 3334"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME1 2690"
## [1] "LOC116035151" "rab11fip1b" "LOC116035135" "rps20" "bco1"
## [6] "rpl7"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME6 716"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME6 604"
## [1] "glis3" "si.dkey.106l3.7" "mtfr1" "hhatlb"
## [5] "LOC116052657" "zic3"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME23 131"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME23 105"
## [1] "zmynd19" "kcnk4a" "LOC116052600" "armh1" "nipbla"
## [6] "rab11fip1a"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME31 88"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME31 78"
## [1] "LOC116051164" "maml1" "mgat4b" "nfkb1" "rab14l"
## [6] "myo1hb"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME7 696"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME7 614"
## [1] "LOC116042168" "LOC116042176" "LOC116035015" "LOC116052262" "grxcr1a"
## [6] "LOC116052444"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME16 207"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME16 162"
## [1] "osbp2" "LOC116038048" "nsdhl" "si.ch73.43g23.1"
## [5] "LOC116036481" "LOC116036640"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME28 105"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME28 71"
## [1] "LOC116042212" "LOC116051161" "ddx54" "nup155" "orai1b"
## [6] "drg1"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME5 758"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME5 672"
## [1] "LOC116055762" "LOC116052266" "fgf13a" "arhgef37" "slc7a2"
## [6] "LOC116063251"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME32 72"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME32 52"
## [1] "arap3" "b4galt1l" "slc44a1b" "wbp1" "ric1"
## [6] "LOC116034915"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME10 418"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME10 364"
## [1] "slc27a4" "LOC116052570" "rnf20" "efnb1" "abhd17b"
## [6] "ccser1"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME36 59"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME36 55"
## [1] "traf2b" "pdlim5b" "slc20a1b" "nt5c2l1" "arhgap29a" "pdk3a"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME18 166"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME18 146"
## [1] "fubp3" "spata13" "fosl1a" "tspan17" "LOC116062520"
## [6] "LOC116053905"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME24 129"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME24 120"
## [1] "LOC118496169" "wsb2" "evpla" "LOC116042958" "slc25a55a"
## [6] "klhdc8a"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME38 33"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME38 28"
## [1] "LOC116054943" "arvcfa" "zgc.56231" "LOC116053930" "hip1rb"
## [6] "LOC116056599"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME26 110"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME26 80"
## [1] "LOC116052304" "slu7" "LOC118494832" "nyap2a" "folr"
## [6] "barx1"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME39 32"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME39 23"
## [1] "LOC116042200" "LOC116060251" "coro1cb" "LOC116038006" "LOC116037943"
## [6] "cpa6"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME14 211"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME14 158"
## [1] "immt" "LOC116042209" "canx" "LOC116058051" "tmem129"
## [6] "rnf130"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME2 2571"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME2 2250"
## [1] "ndufc1" "polr2b" "LOC116035023" "LOC118493028" "LOC116035002"
## [6] "plgrkt"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME8 553"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME8 501"
## [1] "vipr1b" "LOC116038046" "spata4" "LOC116052506" "LOC116052667"
## [6] "nme5"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME20 142"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME20 93"
## [1] "LOC116067369" "LOC116060837" "LOC116034991" "tescb" "taok3b"
## [6] "rhof"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME4 1013"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME4 644"
## [1] "LOC116035126" "LOC116034916" "LOC118495038" "LOC116052334" "tmem265"
## [6] "LOC116052636"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME27 106"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME27 75"
## [1] "LOC116056516" "gdnfa" "stard7" "LOC116048902"
## [5] "stau2" "si.dkey.237h12.3"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME19 148"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME19 116"
## [1] "cplane1" "sardh" "fam163ba" "nktr" "vill" "f9a"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME13 230"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME13 201"
## [1] "adora2ab" "phykpl" "qdpra" "naaladl1" "phf10" "lipca"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME34 61"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME34 52"
## [1] "neurog1" "LOC116060826" "ssh1b" "cabp1b" "LOC116048878"
## [6] "armc3"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME17 190"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME17 137"
## [1] "pttg1" "kmt5ab" "cenpf" "prc1b"
## [5] "plk1" "si.dkey.23f9.4"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME37 56"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME37 49"
## [1] "kcnn2" "LOC116055747" "hs6st2" "LOC116036535" "f2rl2"
## [6] "ube2z"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME33 69"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME33 52"
## [1] "LOC116042171" "npdc1a" "dok6" "LOC116037979" "lingo2"
## [6] "zgc.153018"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME15 210"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME15 152"
## [1] "LOC118496450" "LOC116045012" "LOC116054311" "LOC116062328" "nkpd1"
## [6] "galr1b"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME35 59"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME35 42"
## [1] "tmem119b" "gpat4" "snx24" "pigm" "mars2" "pcdh19"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME12 251"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME12 200"
## [1] "LOC116035179" "smtna" "pde6b" "pou5f3" "clic3"
## [6] "tfap2a"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME29 93"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME29 74"
## [1] "LOC116035122" "spag6" "cbln2b" "si.ch73.22a13.3"
## [5] "smtnl1" "adad1"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME30 92"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME30 71"
## [1] "pcdh18b" "LOC118496138" "tgfbi" "atoh8" "LOC116034939"
## [6] "LOC116035088"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME11 401"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME11 338"
## [1] "LOC116046329" "eya1" "LOC116064437" "zgc.66433" "LOC116053883"
## [6] "tcf7l1b"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME22 134"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME22 121"
## [1] "LOC116035134" "cd226" "c1h8orf34" "LOC116052527" "LOC118492899"
## [6] "LOC118495324"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME21 140"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME21 100"
## [1] "slc7a11" "slitrk2" "sapcd2" "LOC116034992" "LOC116035152"
## [6] "zdhhc8a"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME3 2354"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME3 1819"
## [1] "rc3h2" "LOC116035127" "LOC116048911" "LOC116055741" "pip4k2aa"
## [6] "LOC116052523"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME25 125"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME25 104"
## [1] "LOC116051166" "LOC116051167" "LOC118494724" "LOC116042187" "LOC116042190"
## [6] "LOC116042216"
## [1] "Module genes before Conversion to EntrezIDs"
## [1] "ME0 11284"
## [1] "Module genes After Conversion to EntrezIDs"
## [1] "ME0 4949"
Gene_counts <- list()
for (i in names(Module_Genes_list)) {
a <- length(Gene_counts)
g <- sub("GeneAnno_ME", "", i)
A <- length(unique(na.omit(Module_Genes_list[[i]]$ENTREZID)))
B <- length(na.omit(Module_Genes_list[[i]]$Geneid))
C <- table(RNA_Liver_WGCNA$RNA_Liver_moduleLabels == g)[2]
module_summary <- data.frame(Module = i, Module_Size = C, EnrezID = A)
Gene_counts[[a+1]] <- module_summary}
print(do.call(rbind, Gene_counts))
## Module Module_Size EnrezID
## TRUE GeneAnno_ME9 519 462
## TRUE1 GeneAnno_ME1 3334 2690
## TRUE2 GeneAnno_ME6 716 604
## TRUE3 GeneAnno_ME23 131 105
## TRUE4 GeneAnno_ME31 88 78
## TRUE5 GeneAnno_ME7 696 614
## TRUE6 GeneAnno_ME16 207 162
## TRUE7 GeneAnno_ME28 105 71
## TRUE8 GeneAnno_ME5 758 672
## TRUE9 GeneAnno_ME32 72 52
## TRUE10 GeneAnno_ME10 418 364
## TRUE11 GeneAnno_ME36 59 55
## TRUE12 GeneAnno_ME18 166 146
## TRUE13 GeneAnno_ME24 129 120
## TRUE14 GeneAnno_ME38 33 28
## TRUE15 GeneAnno_ME26 110 80
## TRUE16 GeneAnno_ME39 32 23
## TRUE17 GeneAnno_ME14 211 158
## TRUE18 GeneAnno_ME2 2571 2250
## TRUE19 GeneAnno_ME8 553 501
## TRUE20 GeneAnno_ME20 142 93
## TRUE21 GeneAnno_ME4 1013 644
## TRUE22 GeneAnno_ME27 106 75
## TRUE23 GeneAnno_ME19 148 116
## TRUE24 GeneAnno_ME13 230 201
## TRUE25 GeneAnno_ME34 61 52
## TRUE26 GeneAnno_ME17 190 137
## TRUE27 GeneAnno_ME37 56 49
## TRUE28 GeneAnno_ME33 69 52
## TRUE29 GeneAnno_ME15 210 152
## TRUE30 GeneAnno_ME35 59 42
## TRUE31 GeneAnno_ME12 251 200
## TRUE32 GeneAnno_ME29 93 74
## TRUE33 GeneAnno_ME30 92 71
## TRUE34 GeneAnno_ME11 401 338
## TRUE35 GeneAnno_ME22 134 121
## TRUE36 GeneAnno_ME21 140 100
## TRUE37 GeneAnno_ME3 2354 1819
## TRUE38 GeneAnno_ME25 125 104
## TRUE39 GeneAnno_ME0 11284 4949
RNA_Liver_WGCNA$RNA_Liver_GeneAnno <- Module_Genes_list
saveRDS(RNA_Liver_WGCNA, file = paste0(file.path(path_Output_WGCNA, paste(paste(save_name_RNA_Liver, "List_3", Date, sep="_"), ".rds", sep=""))))
#-